2007-01-03 23:46:08

by Eric Sandeen

[permalink] [raw]
Subject: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

Take 2... all in one file. I suppose I really did know better than
to create that new header. ;-)

Better?

---

CVE-2006-5753 is for a case where an inode can be marked bad, switching
the ops to bad_inode_ops, which are all connected as:

static int return_EIO(void)
{
return -EIO;
}

#define EIO_ERROR ((void *) (return_EIO))

static struct inode_operations bad_inode_ops =
{
.create = bad_inode_create
...etc...

The problem here is that the void cast causes return types to not be
promoted, and for ops such as listxattr which expect more than 32 bits of
return value, the 32-bit -EIO is interpreted as a large positive 64-bit
number, i.e. 0x00000000fffffffa instead of 0xfffffffa.

This goes particularly badly when the return value is taken as a number of
bytes to copy into, say, a user's buffer for example...

I originally had coded up the fix by creating a return_EIO_<TYPE> macro
for each return type, like this:

static int return_EIO_int(void)
{
return -EIO;
}
#define EIO_ERROR_INT ((void *) (return_EIO_int))

static struct inode_operations bad_inode_ops =
{
.create = EIO_ERROR_INT,
...etc...

but Al felt that it was probably better to create an EIO-returner for each
actual op signature. Since so few ops share a signature, I just went ahead
& created an EIO function for each individual file & inode op that returns
a value.

Thanks,

-Eric

Signed-off-by: Eric Sandeen <[email protected]>

Index: linux-2.6.19/fs/bad_inode.c
===================================================================
--- linux-2.6.19.orig/fs/bad_inode.c
+++ linux-2.6.19/fs/bad_inode.c
@@ -14,59 +14,308 @@
#include <linux/time.h>
#include <linux/smp_lock.h>
#include <linux/namei.h>
+#include <linux/poll.h>

-static int return_EIO(void)
+
+static loff_t bad_file_llseek(struct file *file, loff_t offset, int origin)
+{
+ return -EIO;
+}
+
+static ssize_t bad_file_read(struct file *filp, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ return -EIO;
+}
+
+static ssize_t bad_file_write(struct file *filp, const char __user *buf,
+ size_t siz, loff_t *ppos)
+{
+ return -EIO;
+}
+
+static ssize_t bad_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ return -EIO;
+}
+
+static ssize_t bad_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ return -EIO;
+}
+
+static int bad_file_readdir(struct file * filp, void * dirent,
+ filldir_t filldir)
+{
+ return -EIO;
+}
+
+static unsigned int bad_file_poll(struct file *filp, poll_table *wait)
+{
+ return -EIO;
+}
+
+static int bad_file_ioctl (struct inode * inode, struct file * filp,
+ unsigned int cmd, unsigned long arg)
{
return -EIO;
}

-#define EIO_ERROR ((void *) (return_EIO))
+static long bad_file_unlocked_ioctl(struct file *file, unsigned cmd,
+ unsigned long arg)
+{
+ return -EIO;
+}
+
+static long bad_file_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ return -EIO;
+}
+
+static int bad_file_mmap(struct file * file, struct vm_area_struct * vma)
+{
+ return -EIO;
+}
+
+static int bad_file_open(struct inode * inode, struct file * filp)
+{
+ return -EIO;
+}
+
+static int bad_file_flush(struct file *file, fl_owner_t id)
+{
+ return -EIO;
+}
+
+static int bad_file_release(struct inode * inode, struct file * filp)
+{
+ return -EIO;
+}
+
+static int bad_file_fsync(struct file * file, struct dentry *dentry,
+ int datasync)
+{
+ return -EIO;
+}
+
+static int bad_file_aio_fsync(struct kiocb *iocb, int datasync)
+{
+ return -EIO;
+}
+
+static int bad_file_fasync(int fd, struct file *filp, int on)
+{
+ return -EIO;
+}
+
+static int bad_file_lock(struct file *file, int cmd, struct file_lock *fl)
+{
+ return -EIO;
+}
+
+static ssize_t bad_file_sendfile(struct file *in_file, loff_t *ppos,
+ size_t count, read_actor_t actor, void *target)
+{
+ return -EIO;
+}
+
+static ssize_t bad_file_sendpage(struct file *file, struct page *page,
+ int off, size_t len, loff_t *pos, int more)
+{
+ return -EIO;
+}
+
+static unsigned long bad_file_get_unmapped_area(struct file *file,
+ unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ return -EIO;
+}
+
+static int bad_file_check_flags(int flags)
+{
+ return -EIO;
+}
+
+static int bad_file_dir_notify(struct file * file, unsigned long arg)
+{
+ return -EIO;
+}
+
+static int bad_file_flock(struct file *filp, int cmd, struct file_lock *fl)
+{
+ return -EIO;
+}
+
+static ssize_t bad_file_splice_write(struct pipe_inode_info *pipe,
+ struct file *out, loff_t *ppos, size_t len,
+ unsigned int flags)
+{
+ return -EIO;
+}
+
+static ssize_t bad_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ return -EIO;
+}

static const struct file_operations bad_file_ops =
{
- .llseek = EIO_ERROR,
- .aio_read = EIO_ERROR,
- .read = EIO_ERROR,
- .write = EIO_ERROR,
- .aio_write = EIO_ERROR,
- .readdir = EIO_ERROR,
- .poll = EIO_ERROR,
- .ioctl = EIO_ERROR,
- .mmap = EIO_ERROR,
- .open = EIO_ERROR,
- .flush = EIO_ERROR,
- .release = EIO_ERROR,
- .fsync = EIO_ERROR,
- .aio_fsync = EIO_ERROR,
- .fasync = EIO_ERROR,
- .lock = EIO_ERROR,
- .sendfile = EIO_ERROR,
- .sendpage = EIO_ERROR,
- .get_unmapped_area = EIO_ERROR,
+ .llseek = bad_file_llseek,
+ .read = bad_file_read,
+ .write = bad_file_write,
+ .aio_read = bad_file_aio_read,
+ .aio_write = bad_file_aio_write,
+ .readdir = bad_file_readdir,
+ .poll = bad_file_poll,
+ .ioctl = bad_file_ioctl,
+ .unlocked_ioctl = bad_file_unlocked_ioctl,
+ .compat_ioctl = bad_file_compat_ioctl,
+ .mmap = bad_file_mmap,
+ .open = bad_file_open,
+ .flush = bad_file_flush,
+ .release = bad_file_release,
+ .fsync = bad_file_fsync,
+ .aio_fsync = bad_file_aio_fsync,
+ .fasync = bad_file_fasync,
+ .lock = bad_file_lock,
+ .sendfile = bad_file_sendfile,
+ .sendpage = bad_file_sendpage,
+ .get_unmapped_area = bad_file_get_unmapped_area,
+ .check_flags = bad_file_check_flags,
+ .dir_notify = bad_file_dir_notify,
+ .flock = bad_file_flock,
+ .splice_write = bad_file_splice_write,
+ .splice_read = bad_file_splice_read,
};

+static int bad_inode_create (struct inode * dir, struct dentry * dentry,
+ int mode, struct nameidata *nd)
+{
+ return -EIO;
+}
+
+static struct dentry *bad_inode_lookup(struct inode * dir,
+ struct dentry *dentry, struct nameidata *nd)
+{
+ return ERR_PTR(-EIO);
+}
+
+static int bad_inode_link (struct dentry * old_dentry, struct inode * dir,
+ struct dentry *dentry)
+{
+ return -EIO;
+}
+
+static int bad_inode_unlink(struct inode * dir, struct dentry *dentry)
+{
+ return -EIO;
+}
+
+static int bad_inode_symlink (struct inode * dir, struct dentry *dentry,
+ const char * symname)
+{
+ return -EIO;
+}
+
+static int bad_inode_mkdir(struct inode * dir, struct dentry * dentry,
+ int mode)
+{
+ return -EIO;
+}
+
+static int bad_inode_rmdir (struct inode * dir, struct dentry *dentry)
+{
+ return -EIO;
+}
+
+static int bad_inode_mknod (struct inode * dir, struct dentry *dentry,
+ int mode, dev_t rdev)
+{
+ return -EIO;
+}
+
+static int bad_inode_rename (struct inode * old_dir, struct dentry *old_dentry,
+ struct inode * new_dir, struct dentry *new_dentry)
+{
+ return -EIO;
+}
+
+static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
+ int buflen)
+{
+ return -EIO;
+}
+
+static int bad_inode_permission(struct inode *inode, int mask,
+ struct nameidata *nd)
+{
+ return -EIO;
+}
+
+static int bad_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ return -EIO;
+}
+
+static int bad_inode_setattr(struct dentry *direntry, struct iattr *attrs)
+{
+ return -EIO;
+}
+
+static int bad_inode_setxattr(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return -EIO;
+}
+
+static ssize_t bad_inode_getxattr(struct dentry *dentry, const char *name,
+ void *buffer, size_t size)
+{
+ return -EIO;
+}
+
+static ssize_t bad_inode_listxattr(struct dentry *dentry, char *buffer,
+ size_t buffer_size)
+{
+ return -EIO;
+}
+
+static int bad_inode_removexattr(struct dentry *dentry, const char *name)
+{
+ return -EIO;
+}
+
static struct inode_operations bad_inode_ops =
{
- .create = EIO_ERROR,
- .lookup = EIO_ERROR,
- .link = EIO_ERROR,
- .unlink = EIO_ERROR,
- .symlink = EIO_ERROR,
- .mkdir = EIO_ERROR,
- .rmdir = EIO_ERROR,
- .mknod = EIO_ERROR,
- .rename = EIO_ERROR,
- .readlink = EIO_ERROR,
+ .create = bad_inode_create,
+ .lookup = bad_inode_lookup,
+ .link = bad_inode_link,
+ .unlink = bad_inode_unlink,
+ .symlink = bad_inode_symlink,
+ .mkdir = bad_inode_mkdir,
+ .rmdir = bad_inode_rmdir,
+ .mknod = bad_inode_mknod,
+ .rename = bad_inode_rename,
+ .readlink = bad_inode_readlink,
/* follow_link must be no-op, otherwise unmounting this inode
won't work */
- .truncate = EIO_ERROR,
- .permission = EIO_ERROR,
- .getattr = EIO_ERROR,
- .setattr = EIO_ERROR,
- .setxattr = EIO_ERROR,
- .getxattr = EIO_ERROR,
- .listxattr = EIO_ERROR,
- .removexattr = EIO_ERROR,
+ /* put_link returns void */
+ /* truncate returns void */
+ .permission = bad_inode_permission,
+ .getattr = bad_inode_getattr,
+ .setattr = bad_inode_setattr,
+ .setxattr = bad_inode_setxattr,
+ .getxattr = bad_inode_getxattr,
+ .listxattr = bad_inode_listxattr,
+ .removexattr = bad_inode_removexattr,
+ /* truncate_range returns void */
};






2007-01-04 00:26:56

by Andrew Morton

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

On Wed, 03 Jan 2007 17:46:00 -0600
Eric Sandeen <[email protected]> wrote:

> Take 2... all in one file. I suppose I really did know better than
> to create that new header. ;-)
>
> Better?
>
> ---
>
> CVE-2006-5753 is for a case where an inode can be marked bad, switching
> the ops to bad_inode_ops, which are all connected as:
>
> static int return_EIO(void)
> {
> return -EIO;
> }
>
> #define EIO_ERROR ((void *) (return_EIO))
>
> static struct inode_operations bad_inode_ops =
> {
> .create = bad_inode_create
> ...etc...
>
> The problem here is that the void cast causes return types to not be
> promoted, and for ops such as listxattr which expect more than 32 bits of
> return value, the 32-bit -EIO is interpreted as a large positive 64-bit
> number, i.e. 0x00000000fffffffa instead of 0xfffffffa.
>
> This goes particularly badly when the return value is taken as a number of
> bytes to copy into, say, a user's buffer for example...
>
> I originally had coded up the fix by creating a return_EIO_<TYPE> macro
> for each return type, like this:
>
> static int return_EIO_int(void)
> {
> return -EIO;
> }
> #define EIO_ERROR_INT ((void *) (return_EIO_int))
>
> static struct inode_operations bad_inode_ops =
> {
> .create = EIO_ERROR_INT,
> ...etc...
>
> but Al felt that it was probably better to create an EIO-returner for each
> actual op signature. Since so few ops share a signature, I just went ahead
> & created an EIO function for each individual file & inode op that returns
> a value.
>

Al is correct, of course. But the patch takes bad_inode.o from 280 up to 703
bytes, which is a bit sad for some cosmetic thing which nobody ever looks
at or modifies.

Perhaps you can do

static int return_EIO_int(void)
{
return -EIO;
}

static int bad_file_release(struct inode * inode, struct file * filp)
__attribute__((alias("return_EIO_int")));
static int bad_file_fsync(struct inode * inode, struct file * filp)
__attribute__((alias("return_EIO_int")));

etcetera?

2007-01-04 17:51:15

by Eric Sandeen

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

Andrew Morton wrote:

> Al is correct, of course. But the patch takes bad_inode.o from 280 up to 703
> bytes, which is a bit sad for some cosmetic thing which nobody ever looks
> at or modifies.
>
> Perhaps you can do
>
> static int return_EIO_int(void)
> {
> return -EIO;
> }
>
> static int bad_file_release(struct inode * inode, struct file * filp)
> __attribute__((alias("return_EIO_int")));
> static int bad_file_fsync(struct inode * inode, struct file * filp)
> __attribute__((alias("return_EIO_int")));
>
> etcetera?
Ok, try this on for size. Even though the gcc manual says alias doesn't work
on all target machines, I assume linux arches are ok since alias is used
in the core module init & exit code...

Also - is it ok to alias a function with one signature to a function with
another signature?

Note... I also realized that there are a couple of file ops which expect unsigned
returns... poll and get_unmapped_area. The latter seems to be handled just fine by
the caller, which does IS_ERR gyrations to check for errnos.

I'm not so sure about poll; some callers put the return in a signed int, others
unsigned, not sure anyone is really checking for -EIO... I think this op should
probably be returning POLLERR, so that's what I've got in this version.

Anyway, here's the latest stab at this:

Signed-off-by: Eric Sandeen <[email protected]>

Index: linux-2.6.19/fs/bad_inode.c
===================================================================
--- linux-2.6.19.orig/fs/bad_inode.c
+++ linux-2.6.19/fs/bad_inode.c
@@ -14,59 +14,255 @@
#include <linux/time.h>
#include <linux/smp_lock.h>
#include <linux/namei.h>
+#include <linux/poll.h>

-static int return_EIO(void)
+/* Generic EIO-returners, for different types of return values */
+
+static int return_EIO_int(void)
+{
+ return -EIO;
+}
+
+static ssize_t return_EIO_ssize(void)
+{
+ return -EIO;
+}
+
+static long return_EIO_long(void)
+{
+ return -EIO;
+}
+
+static loff_t return_EIO_loff(void)
{
return -EIO;
}

-#define EIO_ERROR ((void *) (return_EIO))
+static long * return_EIO_ptr(void)
+{
+ return ERR_PTR(-EIO);
+}
+
+/* All the specific file ops, aliased to something with the right retval */
+
+static loff_t bad_file_llseek(struct file *file, loff_t offset, int origin)
+ __attribute__((alias("return_EIO_loff")));
+
+static ssize_t bad_file_read(struct file *filp, char __user *buf,
+ size_t size, loff_t *ppos)
+ __attribute__((alias("return_EIO_ssize")));
+
+static ssize_t bad_file_write(struct file *filp, const char __user *buf,
+ size_t siz, loff_t *ppos)
+ __attribute__((alias("return_EIO_ssize")));
+
+static ssize_t bad_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+ __attribute__((alias("return_EIO_ssize")));
+
+static ssize_t bad_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+ __attribute__((alias("return_EIO_ssize")));
+
+static int bad_file_readdir(struct file * filp, void * dirent,
+ filldir_t filldir)
+ __attribute__((alias("return_EIO_int")));
+
+static unsigned int bad_file_poll(struct file *filp, poll_table *wait)
+{
+ return POLLERR;
+}
+
+static int bad_file_ioctl (struct inode * inode, struct file * filp,
+ unsigned int cmd, unsigned long arg)
+ __attribute__((alias("return_EIO_int")));
+
+static long bad_file_unlocked_ioctl(struct file *file, unsigned cmd,
+ unsigned long arg)
+ __attribute__((alias("return_EIO_long")));
+
+static long bad_file_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+ __attribute__((alias("return_EIO_long")));
+
+static int bad_file_mmap(struct file * file, struct vm_area_struct * vma)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_file_open(struct inode * inode, struct file * filp)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_file_flush(struct file *file, fl_owner_t id)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_file_release(struct inode * inode, struct file * filp)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_file_fsync(struct file * file, struct dentry *dentry,
+ int datasync)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_file_aio_fsync(struct kiocb *iocb, int datasync)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_file_fasync(int fd, struct file *filp, int on)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_file_lock(struct file *file, int cmd, struct file_lock *fl)
+ __attribute__((alias("return_EIO_int")));
+
+static ssize_t bad_file_sendfile(struct file *in_file, loff_t *ppos,
+ size_t count, read_actor_t actor, void *target)
+ __attribute__((alias("return_EIO_ssize")));
+
+static ssize_t bad_file_sendpage(struct file *file, struct page *page,
+ int off, size_t len, loff_t *pos, int more)
+ __attribute__((alias("return_EIO_ssize")));
+
+/* caller must use IS_ERR to check for negative error returns */
+static unsigned long bad_file_get_unmapped_area(struct file *file,
+ unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+ __attribute__((alias("return_EIO_long")));
+
+static int bad_file_check_flags(int flags)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_file_dir_notify(struct file * file, unsigned long arg)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_file_flock(struct file *filp, int cmd, struct file_lock *fl)
+ __attribute__((alias("return_EIO_int")));
+
+static ssize_t bad_file_splice_write(struct pipe_inode_info *pipe,
+ struct file *out, loff_t *ppos, size_t len,
+ unsigned int flags)
+ __attribute__((alias("return_EIO_ssize")));
+
+static ssize_t bad_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+ __attribute__((alias("return_EIO_ssize")));

static const struct file_operations bad_file_ops =
{
- .llseek = EIO_ERROR,
- .aio_read = EIO_ERROR,
- .read = EIO_ERROR,
- .write = EIO_ERROR,
- .aio_write = EIO_ERROR,
- .readdir = EIO_ERROR,
- .poll = EIO_ERROR,
- .ioctl = EIO_ERROR,
- .mmap = EIO_ERROR,
- .open = EIO_ERROR,
- .flush = EIO_ERROR,
- .release = EIO_ERROR,
- .fsync = EIO_ERROR,
- .aio_fsync = EIO_ERROR,
- .fasync = EIO_ERROR,
- .lock = EIO_ERROR,
- .sendfile = EIO_ERROR,
- .sendpage = EIO_ERROR,
- .get_unmapped_area = EIO_ERROR,
+ .llseek = bad_file_llseek,
+ .read = bad_file_read,
+ .write = bad_file_write,
+ .aio_read = bad_file_aio_read,
+ .aio_write = bad_file_aio_write,
+ .readdir = bad_file_readdir,
+ .poll = bad_file_poll,
+ .ioctl = bad_file_ioctl,
+ .unlocked_ioctl = bad_file_unlocked_ioctl,
+ .compat_ioctl = bad_file_compat_ioctl,
+ .mmap = bad_file_mmap,
+ .open = bad_file_open,
+ .flush = bad_file_flush,
+ .release = bad_file_release,
+ .fsync = bad_file_fsync,
+ .aio_fsync = bad_file_aio_fsync,
+ .fasync = bad_file_fasync,
+ .lock = bad_file_lock,
+ .sendfile = bad_file_sendfile,
+ .sendpage = bad_file_sendpage,
+ .get_unmapped_area = bad_file_get_unmapped_area,
+ .check_flags = bad_file_check_flags,
+ .dir_notify = bad_file_dir_notify,
+ .flock = bad_file_flock,
+ .splice_write = bad_file_splice_write,
+ .splice_read = bad_file_splice_read,
};

+/* All the specific inode ops, aliased to something with the right retval */
+
+static int bad_inode_create (struct inode * dir, struct dentry * dentry,
+ int mode, struct nameidata *nd)
+ __attribute__((alias("return_EIO_int")));
+
+static struct dentry *bad_inode_lookup(struct inode * dir,
+ struct dentry *dentry, struct nameidata *nd)
+ __attribute__((alias("return_EIO_ptr")));
+
+static int bad_inode_link (struct dentry * old_dentry, struct inode * dir,
+ struct dentry *dentry)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_inode_unlink(struct inode * dir, struct dentry *dentry)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_inode_symlink (struct inode * dir, struct dentry *dentry,
+ const char * symname)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_inode_mkdir(struct inode * dir, struct dentry * dentry,
+ int mode)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_inode_rmdir (struct inode * dir, struct dentry *dentry)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_inode_mknod (struct inode * dir, struct dentry *dentry,
+ int mode, dev_t rdev)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_inode_rename (struct inode * old_dir, struct dentry *old_dentry,
+ struct inode * new_dir, struct dentry *new_dentry)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
+ int buflen)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_inode_permission(struct inode *inode, int mask,
+ struct nameidata *nd)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_inode_setattr(struct dentry *direntry, struct iattr *attrs)
+ __attribute__((alias("return_EIO_int")));
+
+static int bad_inode_setxattr(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
+ __attribute__((alias("return_EIO_int")));
+
+static ssize_t bad_inode_getxattr(struct dentry *dentry, const char *name,
+ void *buffer, size_t size)
+ __attribute__((alias("return_EIO_ssize")));
+
+static ssize_t bad_inode_listxattr(struct dentry *dentry, char *buffer,
+ size_t buffer_size)
+ __attribute__((alias("return_EIO_ssize")));
+
+static int bad_inode_removexattr(struct dentry *dentry, const char *name)
+ __attribute__((alias("return_EIO_int")));
+
static struct inode_operations bad_inode_ops =
{
- .create = EIO_ERROR,
- .lookup = EIO_ERROR,
- .link = EIO_ERROR,
- .unlink = EIO_ERROR,
- .symlink = EIO_ERROR,
- .mkdir = EIO_ERROR,
- .rmdir = EIO_ERROR,
- .mknod = EIO_ERROR,
- .rename = EIO_ERROR,
- .readlink = EIO_ERROR,
+ .create = bad_inode_create,
+ .lookup = bad_inode_lookup,
+ .link = bad_inode_link,
+ .unlink = bad_inode_unlink,
+ .symlink = bad_inode_symlink,
+ .mkdir = bad_inode_mkdir,
+ .rmdir = bad_inode_rmdir,
+ .mknod = bad_inode_mknod,
+ .rename = bad_inode_rename,
+ .readlink = bad_inode_readlink,
/* follow_link must be no-op, otherwise unmounting this inode
won't work */
- .truncate = EIO_ERROR,
- .permission = EIO_ERROR,
- .getattr = EIO_ERROR,
- .setattr = EIO_ERROR,
- .setxattr = EIO_ERROR,
- .getxattr = EIO_ERROR,
- .listxattr = EIO_ERROR,
- .removexattr = EIO_ERROR,
+ /* put_link returns void */
+ /* truncate returns void */
+ .permission = bad_inode_permission,
+ .getattr = bad_inode_getattr,
+ .setattr = bad_inode_setattr,
+ .setxattr = bad_inode_setxattr,
+ .getxattr = bad_inode_getxattr,
+ .listxattr = bad_inode_listxattr,
+ .removexattr = bad_inode_removexattr,
+ /* truncate_range returns void */
};



2007-01-04 18:27:17

by Andrew Morton

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

On Thu, 04 Jan 2007 11:51:10 -0600
Eric Sandeen <[email protected]> wrote:

> Andrew Morton wrote:
>
> > Al is correct, of course. But the patch takes bad_inode.o from 280 up to 703
> > bytes, which is a bit sad for some cosmetic thing which nobody ever looks
> > at or modifies.
> >
> > Perhaps you can do
> >
> > static int return_EIO_int(void)
> > {
> > return -EIO;
> > }
> >
> > static int bad_file_release(struct inode * inode, struct file * filp)
> > __attribute__((alias("return_EIO_int")));
> > static int bad_file_fsync(struct inode * inode, struct file * filp)
> > __attribute__((alias("return_EIO_int")));
> >
> > etcetera?
> Ok, try this on for size. Even though the gcc manual says alias doesn't work
> on all target machines, I assume linux arches are ok since alias is used
> in the core module init & exit code...
>
> Also - is it ok to alias a function with one signature to a function with
> another signature?

Ordinarily I'd say no wucking fay, but that's effectively what we've been
doing in there for ages, and it seems to work.

I'd be a bit worried if any of these functions were returning pointers,
because one could certainly conceive of an arch+compiler combo which
returns pointers in a different register from integers (680x0?) but that's
not happening here.

> Note... I also realized that there are a couple of file ops which expect unsigned
> returns... poll and get_unmapped_area. The latter seems to be handled just fine by
> the caller, which does IS_ERR gyrations to check for errnos.
>
> I'm not so sure about poll; some callers put the return in a signed int, others
> unsigned, not sure anyone is really checking for -EIO... I think this op should
> probably be returning POLLERR, so that's what I've got in this version.

Yeah, that should all be OK.

2007-01-04 18:34:06

by Eric Sandeen

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

Andrew Morton wrote:
> On Thu, 04 Jan 2007 11:51:10 -0600
> Eric Sandeen <[email protected]> wrote:

>> Also - is it ok to alias a function with one signature to a function with
>> another signature?
>
> Ordinarily I'd say no wucking fay, but that's effectively what we've been
> doing in there for ages, and it seems to work.

Hmm that gives me a lot of confidence ;-) I'd hate to carry along bad
assumptions while we try to make this all kosher... but I'm willing to
defer to popular opinion on this one....

> I'd be a bit worried if any of these functions were returning pointers,
> because one could certainly conceive of an arch+compiler combo which
> returns pointers in a different register from integers (680x0?) but that's
> not happening here.

Well, one is...

static long * return_EIO_ptr(void)
{
return ERR_PTR(-EIO);
}
...
static struct dentry *bad_inode_lookup(struct inode * dir,
struct dentry *dentry, struct nameidata *nd)
__attribute__((alias("return_EIO_ptr")));

Maybe it'd be better to lose the alias in this case then? and go back
to this:

static struct dentry *bad_inode_lookup(struct inode * dir,
struct dentry *dentry, struct nameidata *nd)
{
return ERR_PTR(-EIO);
}

Thanks,
-Eric

2007-01-04 18:55:06

by Andrew Morton

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

On Thu, 04 Jan 2007 12:33:59 -0600
Eric Sandeen <[email protected]> wrote:

> Andrew Morton wrote:
> > On Thu, 04 Jan 2007 11:51:10 -0600
> > Eric Sandeen <[email protected]> wrote:
>
> >> Also - is it ok to alias a function with one signature to a function with
> >> another signature?
> >
> > Ordinarily I'd say no wucking fay, but that's effectively what we've been
> > doing in there for ages, and it seems to work.
>
> Hmm that gives me a lot of confidence ;-) I'd hate to carry along bad
> assumptions while we try to make this all kosher... but I'm willing to
> defer to popular opinion on this one....

yeah, I'm a bit wobbly about it. Linus, what do you think?

> > I'd be a bit worried if any of these functions were returning pointers,
> > because one could certainly conceive of an arch+compiler combo which
> > returns pointers in a different register from integers (680x0?) but that's
> > not happening here.
>
> Well, one is...
>
> static long * return_EIO_ptr(void)
> {
> return ERR_PTR(-EIO);
> }
> ...
> static struct dentry *bad_inode_lookup(struct inode * dir,
> struct dentry *dentry, struct nameidata *nd)
> __attribute__((alias("return_EIO_ptr")));
>
> Maybe it'd be better to lose the alias in this case then? and go back
> to this:
>
> static struct dentry *bad_inode_lookup(struct inode * dir,
> struct dentry *dentry, struct nameidata *nd)
> {
> return ERR_PTR(-EIO);
> }

A bit saner, but again, the old code used the same function for *everything*
and apart from the 32/64-bit thing, it worked.

Half a kb isn't much of course, but we've done lots of changes for a lot
less...


2007-01-04 19:09:43

by Linus Torvalds

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values



On Thu, 4 Jan 2007, Andrew Morton wrote:

> On Thu, 04 Jan 2007 12:33:59 -0600
> Eric Sandeen <[email protected]> wrote:
>
> > Andrew Morton wrote:
> > > On Thu, 04 Jan 2007 11:51:10 -0600
> > > Eric Sandeen <[email protected]> wrote:
> >
> > >> Also - is it ok to alias a function with one signature to a function with
> > >> another signature?
> > >
> > > Ordinarily I'd say no wucking fay, but that's effectively what we've been
> > > doing in there for ages, and it seems to work.
> >
> > Hmm that gives me a lot of confidence ;-) I'd hate to carry along bad
> > assumptions while we try to make this all kosher... but I'm willing to
> > defer to popular opinion on this one....
>
> yeah, I'm a bit wobbly about it. Linus, what do you think?

I don't much care. If we ever find an architecture where it matters, we'll
unalias them. In the meantime, we've for the longest time just known that
calling conventions don't care about argument types on all the
architectures we've been on, so we've aliased things to the same function.

But it's not very common, so we can stop doing it.

But I'd argue we should only do it if there is an actual
honest-to-goodness reason to do so. Usually it only matters for

- return types are fundamentally different (FP vs integer vs pointer)

- calling convention has callee popping the arguments (normal in Pascal,
very unusual in C, because it also breaks lots of historical code,
and is simply not workable with K&R C, where perfectly normal things
like "open()" take either two or three arguments without being
varargs).

In general, this just isn't an issue for the kernel. Other systems have
had basically NO typing what-so-ever for functions, and use aliasing much
more extensively. We only do it for a few ratehr rare things.

Linus

2007-01-04 19:14:55

by Al Viro

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

On Thu, Jan 04, 2007 at 11:09:31AM -0800, Linus Torvalds wrote:

> But I'd argue we should only do it if there is an actual
> honest-to-goodness reason to do so.

How about "makes call graph analysis easier"? ;-) In principle, I have
no problem with force-casting, but it'd better be cast to the right
type...

(And yes, there's a bunch of sparse-based fun in making dealing with
call graph analysis and sane annotations needed for that).

2007-01-04 19:22:29

by Al Viro

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

On Thu, Jan 04, 2007 at 07:14:51PM +0000, Al Viro wrote:
> On Thu, Jan 04, 2007 at 11:09:31AM -0800, Linus Torvalds wrote:
>
> > But I'd argue we should only do it if there is an actual
> > honest-to-goodness reason to do so.
>
> How about "makes call graph analysis easier"? ;-) In principle, I have
> no problem with force-casting, but it'd better be cast to the right
> type...
>
> (And yes, there's a bunch of sparse-based fun in making dealing with
> call graph analysis and sane annotations needed for that).

PS: what would be the sane strategy for timer series merge, BTW? It
touches a whole lot of files in rather trivial ways (see below for current
stat), but it's gradually mergable and after the first 4 chunks the rest
can go in independently (per-driver, if we want to go for insane length,
but most of those will be absolutely trivial and I'd rather lump them
into bigger groups). And those 4 chunks in the beginning of series are
safe to merge at any point - result in guaranteed to be identical code...

arch/alpha/kernel/srmcons.c | 8 +--
arch/arm/common/sharpsl_pm.c | 10 ++--
arch/arm/mach-iop32x/n2100.c | 5 +-
arch/arm/mach-pxa/lubbock.c | 12 ++---
arch/i386/kernel/time.c | 6 +-
arch/i386/kernel/tsc.c | 5 +-
arch/i386/mach-voyager/voyager_thread.c | 5 +-
arch/ia64/kernel/mca.c | 12 ++---
arch/ia64/kernel/salinfo.c | 5 +-
arch/ia64/sn/kernel/bte.c | 7 +--
arch/ia64/sn/kernel/bte_error.c | 17 ++----
arch/ia64/sn/kernel/huberror.c | 2 -
arch/ia64/sn/kernel/mca.c | 5 +-
arch/ia64/sn/kernel/xpc_channel.c | 4 --
arch/ia64/sn/kernel/xpc_main.c | 18 ++-----
arch/mips/lasat/picvue_proc.c | 5 +-
arch/mips/sgi-ip22/ip22-reset.c | 20 +++-----
arch/mips/sgi-ip32/ip32-reset.c | 10 ++--
arch/powerpc/oprofile/op_model_cell.c | 6 +-
arch/powerpc/platforms/chrp/chrp.h | 2 -
arch/powerpc/platforms/chrp/setup.c | 4 +-
arch/powerpc/platforms/powermac/low_i2c.c | 7 +--
arch/ppc/syslib/m8xx_wdt.c | 10 ++--
arch/s390/mm/cmm.c | 23 +++------
arch/sh/drivers/push-switch.c | 9 +--
arch/um/drivers/net_kern.c | 7 +--
arch/x86_64/kernel/pci-calgary.c | 7 +--
arch/xtensa/platform-iss/console.c | 10 +---
arch/xtensa/platform-iss/network.c | 13 ++---
block/as-iosched.c | 7 +--
block/cfq-iosched.c | 15 ++----
block/ll_rw_blk.c | 9 +--
drivers/acpi/sbs.c | 7 +--
drivers/acpi/thermal.c | 26 +++-------
drivers/atm/ambassador.c | 10 +---
drivers/atm/firestream.c | 10 +---
drivers/atm/horizon.c | 11 +---
drivers/atm/idt77252.c | 14 ++---
drivers/atm/lanai.c | 7 +--
drivers/atm/nicstar.c | 8 +--
drivers/atm/suni.c | 8 +--
drivers/base/firmware_class.c | 11 ----
drivers/block/DAC960.c | 8 +--
drivers/block/DAC960.h | 2 -
drivers/block/cpqarray.c | 10 +---
drivers/block/swim3.c | 43 +++++-----------
drivers/block/ub.c | 22 ++------
drivers/block/umem.c | 5 +-
drivers/block/xd.c | 4 +-
drivers/block/xd.h | 2 -
drivers/bluetooth/bluecard_cs.c | 7 +--
drivers/bluetooth/hci_bcsp.c | 7 +--
drivers/cdrom/aztcd.c | 10 ++--
drivers/cdrom/cdu31a.c | 5 +-
drivers/cdrom/cm206.c | 11 ++--
drivers/char/drm/via_dmablit.c | 7 +--
drivers/char/dtlk.c | 7 +--
drivers/char/epca.c | 7 +--
drivers/char/genrtc.c | 7 +--
drivers/char/ip2/i2ellis.c | 8 +--
drivers/char/ip2/i2lib.c | 9 +--
drivers/char/ipmi/ipmi_msghandler.c | 4 +-
drivers/char/ipmi/ipmi_si_intf.c | 5 +-
drivers/char/moxa.c | 21 +++-----
drivers/char/mspec.c | 2 -
drivers/char/n_r3964.c | 9 +--
drivers/char/nwbutton.c | 12 ++---
drivers/char/pcmcia/cm4000_cs.c | 7 +--
drivers/char/pcmcia/cm4040_cs.c | 7 +--
drivers/char/pcmcia/synclink_cs.c | 9 +--
drivers/char/rocket.c | 5 +-
drivers/char/rtc.c | 7 +--
drivers/char/scan_keyb.c | 12 +----
drivers/char/specialix.c | 18 ++-----
drivers/char/sx.c | 6 +-
drivers/char/synclink.c | 9 +--
drivers/char/synclink_gt.c | 19 ++-----
drivers/char/synclinkmp.c | 24 +++------
drivers/char/tpm/tpm.c | 8 +--
drivers/char/vt.c | 7 +--
drivers/char/watchdog/alim7101_wdt.c | 8 +--
drivers/char/watchdog/cpu5wdt.c | 6 +-
drivers/char/watchdog/ep93xx_wdt.c | 4 +-
drivers/char/watchdog/machzwd.c | 6 +-
drivers/char/watchdog/mixcomwd.c | 6 +-
drivers/char/watchdog/pcwd.c | 6 +-
drivers/char/watchdog/sbc60xxwdt.c | 8 +--
drivers/char/watchdog/sc520_wdt.c | 8 +--
drivers/char/watchdog/shwdt.c | 6 +-
drivers/char/watchdog/softdog.c | 7 +--
drivers/char/watchdog/w83877f_wdt.c | 8 +--
drivers/fc4/fc.c | 21 ++------
drivers/hwmon/hdaps.c | 5 +-
drivers/i2c/busses/i2c-pnx.c | 7 +--
drivers/ide/ide-io.c | 3 -
drivers/ide/ide-probe.c | 4 --
drivers/ide/legacy/hd.c | 5 +-
drivers/ieee1394/hosts.c | 4 --
drivers/ieee1394/ieee1394_core.c | 3 -
drivers/ieee1394/ieee1394_core.h | 2 -
drivers/infiniband/hw/ehca/ehca_iverbs.h | 2 -
drivers/infiniband/hw/ehca/ehca_main.c | 5 +-
drivers/infiniband/hw/ipath/ipath_init_chip.c | 4 --
drivers/infiniband/hw/ipath/ipath_kernel.h | 2 -
drivers/infiniband/hw/ipath/ipath_stats.c | 3 -
drivers/infiniband/hw/ipath/ipath_verbs.c | 8 +--
drivers/infiniband/hw/mthca/mthca_catas.c | 7 +--
drivers/input/ff-memless.c | 5 +-
drivers/input/gameport/gameport.c | 8 +--
drivers/input/joystick/db9.c | 7 +--
drivers/input/joystick/gamecon.c | 8 +--
drivers/input/joystick/turbografx.c | 7 +--
drivers/input/keyboard/corgikbd.c | 20 +-------
drivers/input/keyboard/locomokbd.c | 13 -----
drivers/input/keyboard/omap-keypad.c | 5 +-
drivers/input/keyboard/spitzkbd.c | 21 +-------
drivers/input/serio/hil_mlc.c | 6 +-
drivers/input/serio/hp_sdc.c | 6 +-
drivers/input/touchscreen/ads7846.c | 7 +--
drivers/input/touchscreen/corgi_ts.c | 8 +--
drivers/isdn/act2000/module.c | 7 +--
drivers/isdn/capi/capidrv.c | 7 +--
drivers/isdn/divert/isdn_divert.c | 11 +---
drivers/isdn/gigaset/bas-gigaset.c | 18 ++-----
drivers/isdn/gigaset/common.c | 5 +-
drivers/isdn/hardware/eicon/divasi.c | 10 +---
drivers/isdn/hisax/amd7930_fn.c | 4 --
drivers/isdn/hisax/arcofi.c | 4 --
drivers/isdn/hisax/diva.c | 4 --
drivers/isdn/hisax/elsa.c | 4 --
drivers/isdn/hisax/fsm.c | 4 --
drivers/isdn/hisax/hfc4s8s_l1.c | 4 --
drivers/isdn/hisax/hfc_2bds0.c | 4 --
drivers/isdn/hisax/hfc_pci.c | 8 +--
drivers/isdn/hisax/hfc_sx.c | 8 +--
drivers/isdn/hisax/hfc_usb.c | 8 +--
drivers/isdn/hisax/hfcscard.c | 4 --
drivers/isdn/hisax/icc.c | 4 --
drivers/isdn/hisax/ipacx.c | 4 --
drivers/isdn/hisax/isac.c | 4 --
drivers/isdn/hisax/isar.c | 8 +--
drivers/isdn/hisax/isdnl3.c | 4 --
drivers/isdn/hisax/saphir.c | 4 --
drivers/isdn/hisax/teleint.c | 4 --
drivers/isdn/hisax/w6692.c | 4 --
drivers/isdn/i4l/isdn_common.c | 6 +-
drivers/isdn/i4l/isdn_net.c | 7 +--
drivers/isdn/i4l/isdn_ppp.c | 11 +---
drivers/isdn/i4l/isdn_tty.c | 7 +--
drivers/isdn/isdnloop/isdnloop.c | 28 +++--------
drivers/isdn/pcbit/drv.c | 11 +---
drivers/isdn/pcbit/layer2.c | 15 ++----
drivers/isdn/sc/command.c | 6 +-
drivers/isdn/sc/interrupt.c | 6 +-
drivers/isdn/sc/timer.c | 64 ++++++++++++------------
drivers/leds/ledtrig-heartbeat.c | 8 +--
drivers/leds/ledtrig-timer.c | 7 +--
drivers/macintosh/smu.c | 10 ++--
drivers/md/md.c | 18 +++----
drivers/media/common/saa7146_fops.c | 3 -
drivers/media/common/saa7146_vbi.c | 11 +---
drivers/media/common/saa7146_video.c | 4 --
drivers/media/dvb/dvb-core/dmxdev.c | 8 +--
drivers/media/radio/radio-cadet.c | 9 +--
drivers/media/video/bt8xx/bttv-driver.c | 7 +--
drivers/media/video/bt8xx/bttv-input.c | 23 ++-------
drivers/media/video/cx88/cx88-input.c | 8 +--
drivers/media/video/cx88/cx88-mpeg.c | 8 +--
drivers/media/video/cx88/cx88-vbi.c | 3 -
drivers/media/video/cx88/cx88-video.c | 11 +---
drivers/media/video/cx88/cx88.h | 2 -
drivers/media/video/ir-kbd-i2c.c | 7 +--
drivers/media/video/pvrusb2/pvrusb2-hdw.c | 7 +--
drivers/media/video/saa6588.c | 8 +--
drivers/media/video/saa7134/saa7134-core.c | 3 -
drivers/media/video/saa7134/saa7134-input.c | 7 +--
drivers/media/video/saa7134/saa7134-ts.c | 4 --
drivers/media/video/saa7134/saa7134-vbi.c | 4 --
drivers/media/video/saa7134/saa7134-video.c | 4 --
drivers/media/video/saa7134/saa7134.h | 2 -
drivers/media/video/tvaudio.c | 7 +--
drivers/media/video/usbvision/usbvision-core.c | 10 +---
drivers/media/video/vivi.c | 7 +--
drivers/message/fusion/mptbase.c | 14 ++---
drivers/message/fusion/mptfc.c | 4 --
drivers/message/fusion/mptsas.c | 4 --
drivers/message/fusion/mptscsih.c | 4 --
drivers/message/fusion/mptscsih.h | 2 -
drivers/message/fusion/mptspi.c | 4 --
drivers/mmc/au1xmmc.c | 8 +--
drivers/mmc/imxmmc.c | 9 +--
drivers/mmc/mmci.c | 7 +--
drivers/mmc/omap.c | 16 ++----
drivers/mmc/sdhci.c | 7 +--
drivers/mmc/wbsd.c | 8 +--
drivers/net/3c515.c | 10 +---
drivers/net/3c59x.c | 18 ++-----
drivers/net/a2065.c | 5 --
drivers/net/amd8111e.c | 4 --
drivers/net/appletalk/cops.c | 10 +---
drivers/net/appletalk/ltpc.c | 8 +--
drivers/net/arm/am79c961a.c | 7 +--
drivers/net/arm/at91_ether.c | 10 +---
drivers/net/arm/ether3.c | 7 +--
drivers/net/atp.c | 9 +--
drivers/net/b44.c | 8 +--
drivers/net/bmac.c | 9 +--
drivers/net/bnx2.c | 7 +--
drivers/net/bonding/bond_main.c | 23 ++++-----
drivers/net/bonding/bond_sysfs.c | 24 +++------
drivers/net/cassini.c | 7 +--
drivers/net/chelsio/sge.c | 23 ++++-----
drivers/net/cris/eth_v10.c | 34 +++++--------
drivers/net/declance.c | 11 ----
drivers/net/dl2k.c | 9 +--
drivers/net/dm9000.c | 9 +--
drivers/net/e100.c | 14 ++---
drivers/net/e1000/e1000_ethtool.c | 14 ++---
drivers/net/e1000/e1000_main.c | 29 +++--------
drivers/net/eepro100.c | 9 +--
drivers/net/epic100.c | 9 +--
drivers/net/eql.c | 7 +--
drivers/net/fealnx.c | 20 ++------
drivers/net/fec_8xx/fec_mii.c | 14 ++---
drivers/net/forcedeth.c | 34 +++----------
drivers/net/hamachi.c | 12 ++---
drivers/net/hamradio/6pack.c | 40 ++++-----------
drivers/net/hamradio/scc.c | 61 +++++++++--------------
drivers/net/hamradio/yam.c | 14 ++---
drivers/net/ibm_emac/ibm_emac_core.c | 7 +--
drivers/net/ioc3-eth.c | 8 +--
drivers/net/irda/irda-usb.c | 8 +--
drivers/net/iseries_veth.c | 19 ++-----
drivers/net/ixgb/ixgb_ethtool.c | 14 ++---
drivers/net/ixgb/ixgb_main.c | 9 +--
drivers/net/ixp2000/enp2611.c | 5 +-
drivers/net/mace.c | 20 ++------
drivers/net/mv643xx_eth.c | 19 -------
drivers/net/myri10ge/myri10ge.c | 9 +--
drivers/net/natsemi.c | 9 +--
drivers/net/netxen/netxen_nic_main.c | 11 +---
drivers/net/ns83820.c | 7 +--
drivers/net/pci-skeleton.c | 9 +--
drivers/net/pcmcia/3c574_cs.c | 9 +--
drivers/net/pcmcia/3c589_cs.c | 9 +--
drivers/net/pcmcia/axnet_cs.c | 9 +--
drivers/net/pcmcia/pcnet_cs.c | 9 +--
drivers/net/pcmcia/smc91c92_cs.c | 9 +--
drivers/net/pcnet32.c | 11 +---
drivers/net/phy/phy.c | 9 +--
drivers/net/qla3xxx.c | 8 +--
drivers/net/r8169.c | 7 +--
drivers/net/rrunner.c | 7 +--
drivers/net/s2io.c | 18 ++-----
drivers/net/s2io.h | 2 -
drivers/net/sb1250-mac.c | 9 +--
drivers/net/shaper.c | 8 +--
drivers/net/sis190.c | 7 +--
drivers/net/sis900.c | 9 +--
drivers/net/sk98lin/skethtool.c | 3 -
drivers/net/sk98lin/skge.c | 6 +-
drivers/net/sky2.c | 5 +-
drivers/net/slip.c | 20 ++------
drivers/net/spider_net.c | 5 --
drivers/net/sunbmac.c | 8 +--
drivers/net/sundance.c | 9 +--
drivers/net/sungem.c | 7 +--
drivers/net/sunhme.c | 9 +--
drivers/net/sunlance.c | 11 ----
drivers/net/tg3.c | 8 +--
drivers/net/tokenring/ibmtr.c | 22 +++-----
drivers/net/tokenring/tms380tr.c | 19 +++----
drivers/net/tsi108_eth.c | 7 +--
drivers/net/tulip/de2104x.c | 16 ++----
drivers/net/tulip/de4x5.c | 16 ++----
drivers/net/tulip/dmfe.c | 9 +--
drivers/net/tulip/interrupt.c | 3 -
drivers/net/tulip/pnic.c | 3 -
drivers/net/tulip/pnic2.c | 3 -
drivers/net/tulip/timer.c | 6 +-
drivers/net/tulip/tulip.h | 12 ++---
drivers/net/tulip/tulip_core.c | 15 ++----
drivers/net/tulip/uli526x.c | 9 +--
drivers/net/tulip/winbond-840.c | 9 +--
drivers/net/ucc_geth.c | 14 ++---
drivers/net/ucc_geth_phy.c | 7 +--
drivers/net/wan/cycx_x25.c | 11 ++--
drivers/net/wan/dscc4.c | 8 +--
drivers/net/wan/hdlc_cisco.c | 9 +--
drivers/net/wan/hdlc_fr.c | 9 +--
drivers/net/wan/lmc/lmc_main.c | 8 +--
drivers/net/wan/sbni.c | 11 +---
drivers/net/wan/sdla.c | 12 +----
drivers/net/wan/syncppp.c | 15 ++----
drivers/net/wireless/arlan-main.c | 7 +--
drivers/net/wireless/atmel.c | 9 +--
drivers/net/wireless/bcm43xx/bcm43xx_leds.c | 5 +-
drivers/net/wireless/hostap/hostap_ap.c | 7 +--
drivers/net/wireless/hostap/hostap_hw.c | 24 +++------
drivers/net/wireless/ray_cs.c | 56 ++++++++-------------
drivers/net/wireless/strip.c | 8 +--
drivers/net/yellowfin.c | 9 +--
drivers/parport/ieee1284.c | 15 +-----
drivers/pci/hotplug/cpqphp.h | 2 -
drivers/pci/hotplug/cpqphp_core.c | 3 -
drivers/pci/hotplug/cpqphp_ctrl.c | 18 +++----
drivers/pci/hotplug/pciehp_ctrl.c | 27 +++++-----
drivers/pci/hotplug/pciehp_hpc.c | 10 +---
drivers/pci/hotplug/shpchp_hpc.c | 14 ++---
drivers/pcmcia/au1000_generic.c | 10 +---
drivers/pcmcia/i82365.c | 6 +-
drivers/pcmcia/m32r_cfc.c | 10 +---
drivers/pcmcia/m32r_pcc.c | 10 +---
drivers/pcmcia/omap_cf.c | 11 ++--
drivers/pcmcia/pd6729.c | 10 +---
drivers/pcmcia/soc_common.c | 10 +---
drivers/pcmcia/tcic.c | 8 +--
drivers/pcmcia/yenta_socket.c | 10 +---
drivers/rtc/rtc-dev.c | 5 +-
drivers/s390/block/dasd.c | 27 +++-------
drivers/s390/char/con3215.c | 7 +--
drivers/s390/char/con3270.c | 18 ++-----
drivers/s390/char/sclp.c | 29 ++++++-----
drivers/s390/char/sclp_con.c | 7 +--
drivers/s390/char/sclp_tty.c | 7 +--
drivers/s390/char/sclp_vt220.c | 6 +-
drivers/s390/char/tape_std.c | 8 +--
drivers/s390/char/tty3270.c | 9 +--
drivers/s390/cio/device_fsm.c | 8 +--
drivers/s390/crypto/ap_bus.c | 6 +-
drivers/s390/net/claw.c | 10 +---
drivers/s390/net/fsm.c | 12 +----
drivers/s390/net/lcs.c | 9 +--
drivers/s390/net/qeth_main.c | 9 +--
drivers/s390/scsi/zfcp_erp.c | 23 +++------
drivers/sbus/char/cpwatchdog.c | 19 ++-----
drivers/scsi/aha152x.c | 7 +--
drivers/scsi/aic94xx/aic94xx_hwi.c | 3 -
drivers/scsi/aic94xx/aic94xx_hwi.h | 2 -
drivers/scsi/aic94xx/aic94xx_scb.c | 3 -
drivers/scsi/aic94xx/aic94xx_tmf.c | 20 +++-----
drivers/scsi/arm/fas216.c | 8 +--
drivers/scsi/dc395x.c | 18 ++-----
drivers/scsi/gdth.c | 6 +-
drivers/scsi/gdth_proc.c | 13 +----
drivers/scsi/ipr.c | 11 ++--
drivers/scsi/libiscsi.c | 6 +-
drivers/scsi/lpfc/lpfc_crtn.h | 12 ++---
drivers/scsi/lpfc/lpfc_ct.c | 3 -
drivers/scsi/lpfc/lpfc_els.c | 10 +---
drivers/scsi/lpfc/lpfc_hbadisc.c | 7 +--
drivers/scsi/lpfc/lpfc_init.c | 29 +++--------
drivers/scsi/lpfc/lpfc_scsi.c | 3 -
drivers/scsi/lpfc/lpfc_sli.c | 4 --
drivers/scsi/megaraid/megaraid_mbox.c | 7 +--
drivers/scsi/megaraid/megaraid_mm.c | 12 +----
drivers/scsi/ncr53c8xx.c | 7 +--
drivers/scsi/pluto.c | 4 +-
drivers/scsi/qla1280.c | 14 ++---
drivers/scsi/qla2xxx/qla_mbx.c | 8 +--
drivers/scsi/qla2xxx/qla_os.c | 8 +--
drivers/scsi/qla4xxx/ql4_os.c | 7 +--
drivers/scsi/scsi.c | 4 +-
drivers/scsi/scsi_debug.c | 15 +-----
drivers/scsi/scsi_error.c | 46 +----------------
drivers/scsi/scsi_priv.h | 2 -
drivers/scsi/sym53c8xx_2/sym_glue.c | 7 +--
drivers/serial/8250.c | 7 +--
drivers/serial/crisv10.c | 5 +-
drivers/serial/imx.c | 7 +--
drivers/serial/m32r_sio.c | 7 +--
drivers/serial/mcfserial.c | 6 +-
drivers/serial/mux.c | 5 +-
drivers/serial/sa1100.c | 18 +++----
drivers/serial/sh-sci.c | 29 +++++------
drivers/serial/sn_console.c | 7 +--
drivers/telephony/ixj.c | 12 ++---
drivers/usb/atm/cxacru.c | 8 +--
drivers/usb/atm/usbatm.c | 8 +--
drivers/usb/core/hcd.c | 8 +--
drivers/usb/gadget/dummy_hcd.c | 7 +--
drivers/usb/gadget/omap_udc.c | 7 +--
drivers/usb/gadget/pxa2xx_udc.c | 8 +--
drivers/usb/gadget/zero.c | 7 +--
drivers/usb/host/ehci-hcd.c | 7 +--
drivers/usb/host/hc_crisv10.c | 20 +++-----
drivers/usb/host/sl811-hcd.c | 8 +--
drivers/usb/host/uhci-hcd.c | 3 -
drivers/usb/host/uhci-q.c | 3 -
drivers/usb/input/hid-core.c | 5 +-
drivers/usb/net/catc.c | 7 +--
drivers/usb/net/usbnet.c | 9 +--
drivers/usb/serial/garmin_gps.c | 8 +--
drivers/video/aty/radeon_base.c | 8 +--
drivers/video/console/fbcon.c | 7 +--
drivers/video/pmag-aa-fb.c | 7 +--
drivers/video/sun3fb.c | 8 +--
fs/aio.c | 8 +--
fs/dlm/recover.c | 7 +--
fs/jbd/journal.c | 7 +--
fs/jbd2/journal.c | 6 +-
fs/ncpfs/inode.c | 5 +-
fs/ncpfs/sock.c | 4 --
fs/ocfs2/cluster/tcp.c | 9 +--
include/asm-alpha/atomic.h | 4 +-
include/asm-ia64/atomic.h | 4 +-
include/asm-ia64/sn/bte.h | 3 +
include/linux/ide.h | 2 -
include/linux/ncp_fs_sb.h | 2 -
include/linux/timer.h | 41 ++++++++++++---
include/media/saa7146_vv.h | 2 -
include/net/ieee80211_crypt.h | 2 -
include/net/inet_connection_sock.h | 6 +-
include/net/llc_c_ac.h | 8 ++-
include/net/sctp/sm.h | 6 +-
kernel/acct.c | 5 +-
kernel/timer.c | 7 ++-
kernel/workqueue.c | 12 ++---
mm/page-writeback.c | 12 ++---
mm/slob.c | 7 +--
net/802/tr.c | 10 ++--
net/appletalk/aarp.c | 6 +-
net/appletalk/ddp.c | 8 +--
net/atm/clip.c | 4 +-
net/atm/lec.c | 21 +++-----
net/ax25/af_ax25.c | 11 +---
net/ax25/ax25_ds_timer.c | 8 +--
net/ax25/ax25_timer.c | 44 ++++++-----------
net/bluetooth/hci_conn.c | 15 ++----
net/bluetooth/hidp/core.c | 9 +--
net/bluetooth/l2cap.c | 8 +--
net/bluetooth/rfcomm/core.c | 8 +--
net/bluetooth/sco.c | 8 +--
net/bridge/br_fdb.c | 3 -
net/bridge/br_private.h | 2 -
net/bridge/br_stp_timer.c | 47 +++++-------------
net/core/flow.c | 5 +-
net/core/neighbour.c | 23 +++------
net/dccp/ccids/ccid2.c | 7 +--
net/dccp/ccids/ccid3.c | 10 +---
net/dccp/output.c | 8 +--
net/dccp/timer.c | 14 ++---
net/decnet/dn_dev.c | 11 +---
net/decnet/dn_route.c | 5 +-
net/decnet/dn_timer.c | 8 +--
net/econet/af_econet.c | 12 ++---
net/ieee80211/ieee80211_crypt.c | 3 -
net/ieee80211/ieee80211_module.c | 5 +-
net/ipv4/igmp.c | 23 ++-------
net/ipv4/inet_connection_sock.c | 20 ++------
net/ipv4/ip_fragment.c | 13 ++---
net/ipv4/ipmr.c | 5 +-
net/ipv4/ipvs/ip_vs_conn.c | 8 +--
net/ipv4/ipvs/ip_vs_est.c | 5 +-
net/ipv4/ipvs/ip_vs_lblc.c | 9 +--
net/ipv4/ipvs/ip_vs_lblcr.c | 9 +--
net/ipv4/netfilter/ip_conntrack_core.c | 18 ++-----
net/ipv4/netfilter/ipt_ULOG.c | 25 +++++----
net/ipv4/route.c | 17 +++---
net/ipv4/tcp_timer.c | 19 +++----
net/ipv6/addrconf.c | 26 ++++------
net/ipv6/mcast.c | 30 +++--------
net/ipv6/netfilter/nf_conntrack_reasm.c | 13 ++---
net/ipv6/reassembly.c | 12 ++---
net/irda/af_irda.c | 10 +---
net/irda/irttp.c | 10 +---
net/lapb/lapb_timer.c | 18 ++-----
net/llc/llc_c_ac.c | 19 +++----
net/llc/llc_conn.c | 16 ++----
net/llc/llc_station.c | 6 +-
net/netfilter/nf_conntrack_core.c | 12 ++---
net/netfilter/nf_conntrack_expect.c | 8 +--
net/netfilter/nfnetlink_log.c | 10 +---
net/netfilter/xt_hashlimit.c | 10 +---
net/netrom/af_netrom.c | 5 +-
net/netrom/nr_timer.c | 48 ++++++------------
net/rose/af_rose.c | 17 ------
net/rose/rose_link.c | 16 ++----
net/rose/rose_loopback.c | 7 +--
net/rose/rose_timer.c | 34 +++++--------
net/rxrpc/call.c | 25 ++-------
net/sched/sch_api.c | 11 ++--
net/sched/sch_cbq.c | 15 ++----
net/sched/sch_generic.c | 8 +--
net/sched/sch_hfsc.c | 8 +--
net/sched/sch_htb.c | 14 ++---
net/sched/sch_netem.c | 8 +--
net/sched/sch_sfq.c | 7 +--
net/sched/sch_tbf.c | 8 +--
net/sctp/associola.c | 7 +--
net/sctp/sm_sideeffect.c | 27 +++-------
net/sctp/transport.c | 8 +--
net/sunrpc/sched.c | 4 --
net/sunrpc/svcsock.c | 5 +-
net/sunrpc/xprt.c | 8 +--
net/wanrouter/af_wanpipe.c | 27 +++-------
net/x25/af_x25.c | 16 ------
net/x25/x25_link.c | 10 +---
net/x25/x25_timer.c | 19 ++-----
net/xfrm/xfrm_policy.c | 7 +--
net/xfrm/xfrm_state.c | 17 ++----
sound/core/pcm.c | 10 ----
sound/core/timer.c | 7 +--
sound/drivers/dummy.c | 7 +--
sound/drivers/mpu401/mpu401_uart.c | 7 +--
sound/drivers/mtpav.c | 7 +--
sound/drivers/opl3/opl3_midi.c | 3 -
sound/drivers/opl3/opl3_seq.c | 4 --
sound/drivers/opl3/opl3_voice.h | 2 -
sound/drivers/serial-u16550.c | 9 +--
sound/i2c/other/ak4117.c | 10 +---
sound/isa/sb/emu8000_pcm.c | 7 +--
sound/isa/sb/sb8_midi.c | 8 +--
sound/isa/wavefront/wavefront_midi.c | 9 ++-
sound/oss/trident.c | 6 +-
sound/oss/waveartist.c | 11 ++--
sound/pci/echoaudio/midi.c | 9 +--
sound/pci/korg1212/korg1212.c | 7 +--
sound/pci/rme9652/hdsp.c | 7 +--
sound/pci/rme9652/hdspm.c | 7 +--
sound/synth/emux/emux.c | 4 --
sound/synth/emux/emux_synth.c | 3 -
sound/synth/emux/emux_voice.h | 2 -
sound/usb/usbmidi.c | 7 +--
524 files changed, 1700 insertions(+), 3605 deletions(-)

2007-01-04 19:24:09

by Mikael Pettersson

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

On Thu, 04 Jan 2007 11:51:10 -0600, Eric Sandeen wrote:
>Also - is it ok to alias a function with one signature to a function with
>another signature?

NO!

Specific cases of type mismatches may work on many machines
(say different pointer types as long as they aren't accessed),
but in general this won't work since types affect calling conventions.
Abusing aliasing like this is exactly like casting a function
pointer to a different type that it actually has, and then invoking
it at that type: all bets are off.

Don't do this. Ever.

/Mikael

2007-01-04 19:30:39

by Linus Torvalds

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values



On Thu, 4 Jan 2007, Al Viro wrote:
>
> How about "makes call graph analysis easier"? ;-) In principle, I have
> no problem with force-casting, but it'd better be cast to the right
> type...

Do we really care in the kernel? We simply never use function pointer
casts like this for anything non-trivial, so if the graph analysis just
doesn't work for those cases, do we really even care?

The only case I can _remember_ us doing this for is literally the
error-returning functions, where the call graph finding them really
doesn't matter, I think.

So I don't _object_ to that reason, I just wonder whether it's really a
big issue..

Linus

2007-01-04 19:32:28

by Linus Torvalds

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values



On Thu, 4 Jan 2007, Al Viro wrote:
>
> PS: what would be the sane strategy for timer series merge, BTW?

I think Andrew may care more. I just care about it happening after 2.6.20.

Whether we can do it really early after that, or whether we should do it
as the last thing just before releasing -rc1 (to avoid having other people
have to fix up conflicts during the merge window), who knows..

Linus

2007-01-04 20:24:17

by Al Viro

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

On Thu, Jan 04, 2007 at 11:30:22AM -0800, Linus Torvalds wrote:
>
>
> On Thu, 4 Jan 2007, Al Viro wrote:
> >
> > How about "makes call graph analysis easier"? ;-) In principle, I have
> > no problem with force-casting, but it'd better be cast to the right
> > type...
>
> Do we really care in the kernel? We simply never use function pointer
> casts like this for anything non-trivial, so if the graph analysis just
> doesn't work for those cases, do we really even care?

Umm... Let me put it that way - amount of things that can be done to
void * is much more than what can be done to function pointers. So
keeping track of them gets easier if we never do casts to/from void *.
What's more, very few places in the kernel try to do that _and_ most
of those that do are simply too lazy to declare local variable with
the right type. bad_inode.c covers most of what remains.

IMO we ought to start checking for that kind of stuff; note that we _still_
have strugglers from pt_regs removal where interrupt handler still takes
3 arguments, but we don't notice since argument of request_irq() is cast
to void * ;-/

That's local stuff; however, when trying to do non-local work (e.g. deduce
that foo() may be called from BH, bar() is always called from process
context, etc. _without_ fuckloads of annotations all over the place), the
ban on mixing void * with function pointers helps a _lot_.

So my main issue with fs/bad_inode.c is not even cast per se; it's that
cast is to void *.

2007-01-04 21:00:42

by Andrew Morton

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

On Thu, 4 Jan 2007 20:24:12 +0000
Al Viro <[email protected]> wrote:

> So my main issue with fs/bad_inode.c is not even cast per se; it's that
> cast is to void *.

But Eric's latest patch is OK in that regard, isn't it? It might confuse
parsers (in fixable ways), but it is type-correct and has no casts. (Well,
it kinda has an link-time cast).

2007-01-04 21:04:25

by Eric Sandeen

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

Andrew Morton wrote:
> On Thu, 4 Jan 2007 20:24:12 +0000
> Al Viro <[email protected]> wrote:
>
>> So my main issue with fs/bad_inode.c is not even cast per se; it's that
>> cast is to void *.
>
> But Eric's latest patch is OK in that regard, isn't it? It might confuse
> parsers (in fixable ways), but it is type-correct and has no casts. (Well,
> it kinda has an link-time cast).

Even if it is, I'm starting to wonder if all this tricksiness is really
worth it for 400 bytes or so. :)

-Eric

2007-01-04 21:10:29

by Andrew Morton

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

On Thu, 04 Jan 2007 15:04:17 -0600
Eric Sandeen <[email protected]> wrote:

> Andrew Morton wrote:
> > On Thu, 4 Jan 2007 20:24:12 +0000
> > Al Viro <[email protected]> wrote:
> >
> >> So my main issue with fs/bad_inode.c is not even cast per se; it's that
> >> cast is to void *.
> >
> > But Eric's latest patch is OK in that regard, isn't it? It might confuse
> > parsers (in fixable ways), but it is type-correct and has no casts. (Well,
> > it kinda has an link-time cast).
>
> Even if it is, I'm starting to wonder if all this tricksiness is really
> worth it for 400 bytes or so. :)
>

Ah, but it's a learning opportunity!

btw, couldn't we fix this bug with a simple old

--- a/fs/bad_inode.c~a
+++ a/fs/bad_inode.c
@@ -15,7 +15,7 @@
#include <linux/smp_lock.h>
#include <linux/namei.h>

-static int return_EIO(void)
+static long return_EIO(void)
{
return -EIO;
}
_

?

2007-01-04 21:18:23

by Eric Sandeen

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

Andrew Morton wrote:
> On Thu, 04 Jan 2007 15:04:17 -0600
> Eric Sandeen <[email protected]> wrote:
>
>> Andrew Morton wrote:
>>> On Thu, 4 Jan 2007 20:24:12 +0000
>>> Al Viro <[email protected]> wrote:
>>>
>>>> So my main issue with fs/bad_inode.c is not even cast per se; it's that
>>>> cast is to void *.
>>> But Eric's latest patch is OK in that regard, isn't it? It might confuse
>>> parsers (in fixable ways), but it is type-correct and has no casts. (Well,
>>> it kinda has an link-time cast).
>> Even if it is, I'm starting to wonder if all this tricksiness is really
>> worth it for 400 bytes or so. :)
>>
>
> Ah, but it's a learning opportunity!

*grin*

> btw, couldn't we fix this bug with a simple old
>
> --- a/fs/bad_inode.c~a
> +++ a/fs/bad_inode.c
> @@ -15,7 +15,7 @@
> #include <linux/smp_lock.h>
> #include <linux/namei.h>
>
> -static int return_EIO(void)
> +static long return_EIO(void)
> {
> return -EIO;
> }
> _
>
> ?

What about ops that return loff_t (64 bits) on 32-bit arches and stuff
it into 2 registers....

#include <stdio.h>
#include <sys/types.h>
#include <sys/errno.h>

static long return_EIO(void)
{
return -EIO;
}

#define EIO_ERROR ((void *) (return_EIO))

int main(int argc, char ** argv)
{
loff_t error;
loff_t (*fn_ptr) (int);

fn_ptr = EIO_ERROR;

error = fn_ptr(0);
printf("and... error is %lld\n", error);
return 0;
}

[root]# ./ssize_eio
and... error is 8589934587
[root]# uname -m
i686

I'm still not convinced that this is the best place to be clever :)

-Eric

2007-01-04 21:31:04

by Linus Torvalds

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values



On Thu, 4 Jan 2007, Eric Sandeen wrote:
> Andrew Morton wrote:
>
> > btw, couldn't we fix this bug with a simple old
> >
> > --- a/fs/bad_inode.c~a
> > +++ a/fs/bad_inode.c
> > @@ -15,7 +15,7 @@
> > #include <linux/smp_lock.h>
> > #include <linux/namei.h>
> >
> > -static int return_EIO(void)
> > +static long return_EIO(void)
> > {
> > return -EIO;
> > }
> > _
> >
> > ?
>
> What about ops that return loff_t (64 bits) on 32-bit arches and stuff
> it into 2 registers....

Do we actually have cases where we cast to a different return value?

I'll happily cast away arguments that aren't used, but I'm not sure that
we ever should cast different return values (not "int" vs "long", but also
not "loff_t" etc).

On 32-bit architectures, 64-bit entities may be returned totally different
ways (ie things like "caller allocates space for them and passes in a
magic pointer to the return value as the first _real_ argument").

So with my previous email, I was definitely _not_ trying to say that
casting function pointers is ok. In practice it is ok when the _arguments_
differ, but not necessarily when the _return-type_ differs.

I was cc'd into the discussion late, so I didn't realize that we
apparently already have a situation where changing the return value to
"long" might make a difference. If so, I agree that we shouldn't do this
at all (although Andrew's change to "long" seems perfectly fine as a "make
old cases continue to work" patch if it actually matters).

Linus

2007-01-04 21:50:35

by Eric Sandeen

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

Linus Torvalds wrote:
>
> On Thu, 4 Jan 2007, Eric Sandeen wrote:
>> Andrew Morton wrote:
>>
>>> btw, couldn't we fix this bug with a simple old
>>>
>>> --- a/fs/bad_inode.c~a
>>> +++ a/fs/bad_inode.c
>>> @@ -15,7 +15,7 @@
>>> #include <linux/smp_lock.h>
>>> #include <linux/namei.h>
>>>
>>> -static int return_EIO(void)
>>> +static long return_EIO(void)
>>> {
>>> return -EIO;
>>> }
>>> _
>>>
>>> ?
>> What about ops that return loff_t (64 bits) on 32-bit arches and stuff
>> it into 2 registers....
>
> Do we actually have cases where we cast to a different return value?

Today, via the void * function casts in the bad file/inode ops, in
effect yes.

static int return_EIO(void)
{
return -EIO;
}

#define EIO_ERROR ((void *) (return_EIO))

...
.listxattr = EIO_ERROR,

but listxattr is supposed to return a ssize_t, which is 64 bits on some
platforms, and only 32 bits get filled in thanks to the (void *) cast.
So we wind up with something other than the return value we expect...

Andrew's long suggestion breaks things the other way, with 64-bit
returning ops on 32-bit arches which again only pick up the first 32
bits thanks to the (void *) cast.

If we're really happy with casting away the function arguments (which
are not -used- in the bad_foo ops anyway), then I'd maybe suggest going
back to my first try at this thing:

static int return_EIO_int(void)
{
return -EIO;
}
#define EIO_ERROR_INT ((void *) (return_EIO_int))

static struct inode_operations bad_inode_ops =
{
.create = EIO_ERROR_INT,
...etc...

which is most like what we have today, except with specific return types.

-Eric

2007-01-04 21:52:12

by Al Viro

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

On Thu, Jan 04, 2007 at 01:30:47PM -0800, Linus Torvalds wrote:

> I'll happily cast away arguments that aren't used, but I'm not sure that
> we ever should cast different return values (not "int" vs "long", but also
> not "loff_t" etc).
>
> On 32-bit architectures, 64-bit entities may be returned totally different
> ways (ie things like "caller allocates space for them and passes in a
> magic pointer to the return value as the first _real_ argument").
>
> So with my previous email, I was definitely _not_ trying to say that
> casting function pointers is ok. In practice it is ok when the _arguments_
> differ, but not necessarily when the _return-type_ differs.
>
> I was cc'd into the discussion late, so I didn't realize that we
> apparently already have a situation where changing the return value to
> "long" might make a difference. If so, I agree that we shouldn't do this
> at all (although Andrew's change to "long" seems perfectly fine as a "make
> old cases continue to work" patch if it actually matters).

We do.
loff_t (*llseek) (struct file *, loff_t, int);
...
int (*readdir) (struct file *, void *, filldir_t);

static const struct file_operations bad_file_ops =
{
.llseek = EIO_ERROR,
...
.readdir = EIO_ERROR,


Moreover, we have int, loff_t, ssize_t and long, plus the unsigned variants.
At least 3 versions, unless you want to mess with ifdefs to reduce them to
two.

2007-01-04 22:18:29

by Mitchell Blank Jr

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

Al Viro wrote:
> At least 3 versions, unless you want to mess with ifdefs to reduce them to
> two.

I don't think you need to do fancy #ifdef's:

static s32 return_eio_32(void) { return -EIO; }
static s64 return_eio_64(void) { return -EIO; }
extern void return_eio_unknown(void); /* Doesn't exist */
#define return_eio(type) ((sizeof(type) == 4) \
? ((void *) return_eio_32) \
: ((sizeof(type) == 8) \
? ((void *) return_eio_64) \
: ((void *) return_eio_unknown)))

-Mitch

2007-01-04 22:35:44

by Linus Torvalds

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values



On Thu, 4 Jan 2007, Mitchell Blank Jr wrote:
>
> I don't think you need to do fancy #ifdef's:
>
> static s32 return_eio_32(void) { return -EIO; }
> static s64 return_eio_64(void) { return -EIO; }
> extern void return_eio_unknown(void); /* Doesn't exist */
> #define return_eio(type) ((sizeof(type) == 4) \
> ? ((void *) return_eio_32) \
> : ((sizeof(type) == 8) \
> ? ((void *) return_eio_64) \
> : ((void *) return_eio_unknown)))

Well, that probably would work, but it's also true that returning a 64-bit
value on a 32-bit platform really _does_ depend on more than the size.

For an example of this, try compiling this:

long long a(void)
{
return -1;
}

struct dummy { int a, b };

struct dummy b(void)
{
struct dummy retval = { -1 , -1 };
return retval;
}

on x86.

Now, I don't think we actually have anything like this in the kernel, and
your example is likely to work very well in practice, but once we start
doing tricks like this, I actually think it's getting easier to just say
"let's not play tricks with function types at all".

Anybody want to send in the patch that just generates separate versions
for

loff_t eio_llseek(struct file *file, loff_t offset, int origin)
{
return -EIO;
}

int eio_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
return -EIO;
..

and so on?

Linus

2007-01-04 22:48:36

by Eric Sandeen

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

Linus Torvalds wrote:
> Anybody want to send in the patch that just generates separate versions
> for
>
> loff_t eio_llseek(struct file *file, loff_t offset, int origin)
> {
> return -EIO;
> }
>
> int eio_readdir(struct file *filp, void *dirent, filldir_t filldir)
> {
> return -EIO;
> ..
>
> and so on?

That's more or less what I sent at http://lkml.org/lkml/2007/1/3/244

+static int bad_file_readdir(struct file * filp, void * dirent,
+ filldir_t filldir)
+{
+ return -EIO;
+}

... etc

Thanks,
-Eric

2007-01-04 23:00:30

by Mitchell Blank Jr

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

Linus Torvalds wrote:
> Well, that probably would work, but it's also true that returning a 64-bit
> value on a 32-bit platform really _does_ depend on more than the size.

Yeah, obviously this is restricted to the signed-integer case. My point
was just that you could have the compiler figure out which variant to pick
for loff_t automatically.

> "let's not play tricks with function types at all".

I think I agree. The real (but harder) fix for the wasted space issue
would be to get the toolchain to automatically combine functions that
end up compiling into identical assembly.

-Mitch

2007-01-04 23:07:11

by Andrew Morton

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

On Thu, 4 Jan 2007 14:35:09 -0800 (PST)
Linus Torvalds <[email protected]> wrote:

> Anybody want to send in the patch that just generates separate versions
> for
>
> loff_t eio_llseek(struct file *file, loff_t offset, int origin)
> {
> return -EIO;
> }
>
> int eio_readdir(struct file *filp, void *dirent, filldir_t filldir)
> {
> return -EIO;
> ..
>

That's what I currently have queued. It increases bad_inode.o text from
200-odd bytes to 700-odd :(

<looks at sys_ni_syscall>

2007-01-04 23:17:57

by Linus Torvalds

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values



On Thu, 4 Jan 2007, Andrew Morton wrote:
>
> That's what I currently have queued. It increases bad_inode.o text from
> 200-odd bytes to 700-odd :(

Then I think we're ok. We do care about bytes, but we care more about
bytes that actually ever hit the icache or dcache, and this will
effectively do neither.

> <looks at sys_ni_syscall>

That one should be ok. System calls by definition have the same return
type, since they all have the same call site. So that one really does fit
the "argument types differ, but the return type is the same" case.

Linus

2007-01-04 23:29:09

by Eric Sandeen

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

Linus Torvalds wrote:

> On Thu, 4 Jan 2007, Andrew Morton wrote:
>
>> That's what I currently have queued. It increases bad_inode.o text from
>> 200-odd bytes to 700-odd :(
>>
> Then I think we're ok. We do care about bytes, but we care more about
> bytes that actually ever hit the icache or dcache, and this will
> effectively do neither.
>
>
Oh good. Resolution? ;-)

Andrew, if you stick with what you've got, you might want this on top of it.

Mostly cosmetic, making placement of * for pointers consistently " *foo"
not "* foo," (was a mishmash before, from cut-n-paste), but also making
.poll return POLLERR.

Thanks,
-Eric

Signed-off-by: Eric Sandeen <[email protected]>

Index: linux-2.6.19/fs/bad_inode.c
===================================================================
--- linux-2.6.19.orig/fs/bad_inode.c
+++ linux-2.6.19/fs/bad_inode.c
@@ -46,18 +46,17 @@ static ssize_t bad_file_aio_write(struct
return -EIO;
}

-static int bad_file_readdir(struct file * filp, void * dirent,
- filldir_t filldir)
+static int bad_file_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
return -EIO;
}

static unsigned int bad_file_poll(struct file *filp, poll_table *wait)
{
- return -EIO;
+ return POLLERR;
}

-static int bad_file_ioctl (struct inode * inode, struct file * filp,
+static int bad_file_ioctl (struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
return -EIO;
@@ -75,12 +74,12 @@ static long bad_file_compat_ioctl(struct
return -EIO;
}

-static int bad_file_mmap(struct file * file, struct vm_area_struct * vma)
+static int bad_file_mmap(struct file *file, struct vm_area_struct *vma)
{
return -EIO;
}

-static int bad_file_open(struct inode * inode, struct file * filp)
+static int bad_file_open(struct inode *inode, struct file *filp)
{
return -EIO;
}
@@ -90,12 +89,12 @@ static int bad_file_flush(struct file *f
return -EIO;
}

-static int bad_file_release(struct inode * inode, struct file * filp)
+static int bad_file_release(struct inode *inode, struct file *filp)
{
return -EIO;
}

-static int bad_file_fsync(struct file * file, struct dentry *dentry,
+static int bad_file_fsync(struct file *file, struct dentry *dentry,
int datasync)
{
return -EIO;
@@ -140,7 +139,7 @@ static int bad_file_check_flags(int flag
return -EIO;
}

-static int bad_file_dir_notify(struct file * file, unsigned long arg)
+static int bad_file_dir_notify(struct file *file, unsigned long arg)
{
return -EIO;
}
@@ -194,54 +193,54 @@ static const struct file_operations bad_
.splice_read = bad_file_splice_read,
};

-static int bad_inode_create (struct inode * dir, struct dentry * dentry,
+static int bad_inode_create (struct inode *dir, struct dentry *dentry,
int mode, struct nameidata *nd)
{
return -EIO;
}

-static struct dentry *bad_inode_lookup(struct inode * dir,
+static struct dentry *bad_inode_lookup(struct inode *dir,
struct dentry *dentry, struct nameidata *nd)
{
return ERR_PTR(-EIO);
}

-static int bad_inode_link (struct dentry * old_dentry, struct inode * dir,
+static int bad_inode_link (struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
return -EIO;
}

-static int bad_inode_unlink(struct inode * dir, struct dentry *dentry)
+static int bad_inode_unlink(struct inode *dir, struct dentry *dentry)
{
return -EIO;
}

-static int bad_inode_symlink (struct inode * dir, struct dentry *dentry,
- const char * symname)
+static int bad_inode_symlink (struct inode *dir, struct dentry *dentry,
+ const char *symname)
{
return -EIO;
}

-static int bad_inode_mkdir(struct inode * dir, struct dentry * dentry,
+static int bad_inode_mkdir(struct inode *dir, struct dentry *dentry,
int mode)
{
return -EIO;
}

-static int bad_inode_rmdir (struct inode * dir, struct dentry *dentry)
+static int bad_inode_rmdir (struct inode *dir, struct dentry *dentry)
{
return -EIO;
}

-static int bad_inode_mknod (struct inode * dir, struct dentry *dentry,
+static int bad_inode_mknod (struct inode *dir, struct dentry *dentry,
int mode, dev_t rdev)
{
return -EIO;
}

-static int bad_inode_rename (struct inode * old_dir, struct dentry *old_dentry,
- struct inode * new_dir, struct dentry *new_dentry)
+static int bad_inode_rename (struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
{
return -EIO;
}
@@ -337,7 +336,7 @@ static struct inode_operations bad_inode
* on it to fail from this point on.
*/

-void make_bad_inode(struct inode * inode)
+void make_bad_inode(struct inode *inode)
{
remove_inode_hash(inode);

@@ -362,7 +361,7 @@ EXPORT_SYMBOL(make_bad_inode);
* Returns true if the inode in question has been marked as bad.
*/

-int is_bad_inode(struct inode * inode)
+int is_bad_inode(struct inode *inode)
{
return (inode->i_op == &bad_inode_ops);
}


2007-01-04 23:52:30

by Al Viro

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

On Thu, Jan 04, 2007 at 03:21:06PM -0800, Mitchell Blank Jr wrote:
> Linus Torvalds wrote:
> > Well, that probably would work, but it's also true that returning a 64-bit
> > value on a 32-bit platform really _does_ depend on more than the size.
>
> Yeah, obviously this is restricted to the signed-integer case. My point
> was just that you could have the compiler figure out which variant to pick
> for loff_t automatically.
>
> > "let's not play tricks with function types at all".
>
> I think I agree. The real (but harder) fix for the wasted space issue
> would be to get the toolchain to automatically combine functions that
> end up compiling into identical assembly.

Can't do.

int f(void)
{
return 0;
}

int g(void)
{
return 0;
}

int is_f(int (*p)(void))
{
return p == f;
}

main()
{
printf("%d %d\n", is_f(f), is_f(g));
}

would better produce
1 0
for anything resembling a sane C compiler. Comparing pointers to
functions for equality is a well-defined operation and it's not
to be messed with.

You _can_ compile g into jump to f, but that's it. And that, AFAICS,
is what gcc does.

2007-01-05 05:39:16

by Mitchell Blank Jr

[permalink] [raw]
Subject: Duplicated functions (was: fix memory corruption from misinterpreted bad_inode_ops return values)

Al Viro wrote:
> On Thu, Jan 04, 2007 at 03:21:06PM -0800, Mitchell Blank Jr wrote:
> > The real (but harder) fix for the wasted space issue
> > would be to get the toolchain to automatically combine functions that
> > end up compiling into identical assembly.
>
> Can't do.
[...]
> Comparing pointers to
> functions for equality is a well-defined operation and it's not
> to be messed with.

I agree -- it definitely could never be an "always on" optimization. What I
was postulating would be a special post-processing step that could be run by
people interested in squeezing the kernel as small as possible. I'd bet
that there's not many functions in the kernel that rely on having a unique
address. If some exist they could be put in their own ELF section via
a "__unique_function_address_needed" macro or something.

> You _can_ compile g into jump to f, but that's it.

Actually if you don't mind sacrificing alignment (which we do anyway with -Os,
I think) you could give a function two unique addresses just by prepending it
with a single-byte noop instruction.

But anyway... how about some actual data?

Just out of curiosity I threw together a quick script (attached) that
parses the output of "objdump --disassemble vmlinux" and tries to find
functions that share the same opcodes. The short summary of the results:
a bit of .text savings but not an amazing amount -- on my tiny nfsroot
test kernel it found 4034 bytes worth of possible savings (out of a total
of 1460830 bytes of opcodes in .text) About what I expected, actually.

One line in its output really stands out, however:

| 218 bytes: __copy_from_user_ll_nozero, __copy_from_user_ll, __copy_to_user_ll

Those three big functions are exactly the same, which actually makes sense
if you think about how they're implemented. That's a bunch of *very* hot
Icache lines we're wasting there!

Anyway, hopefully some folks will find the script interesting. In particular
maybe the janitors can play with it. I'd wager some of the things it
turns up are good candidates for code re-use.

-Mitch

Script's output:

Duplicated functions:
98 bytes: nlmsvc_proc_granted, nlm4svc_proc_granted
10 bytes: return_EIO, hung_up_tty_write
10 bytes: free_pid_ns, mempool_kfree, free_bd_holder, nfs4_free_open_state, ipc_immediate_free, dir_release, pci_release_bus_bridge_dev, put_tty_driver, device_create_release, class_create_release, class_device_create_release, isa_dev_release, neigh_parms_destroy, unx_destroy_cred, pmap_map_release, rpc_free_iostats, free
15 bytes: xor_64, xor_64
7 bytes: native_set_pte, native_set_pmd, debugfs_u32_set
24 bytes: part_uevent_store, store_uevent
54 bytes: atm_init_aal34, atm_init_aal5
22 bytes: nfs4_encode_void, nlmsvc_encode_void, nlm4svc_encode_void
42 bytes: seq_release_private, content_release
10 bytes: profile_event_register, profile_event_unregister, nocrypt, nocrypt_iv
7 bytes: nfs_proc_commit_setup, udp_lib_hash, udp_lib_hash
179 bytes: nlmsvc_proc_sm_notify, nlm4svc_proc_sm_notify
29 bytes: nfs4_decode_void, nlmsvc_decode_void, nlm4svc_decode_void
62 bytes: nfs3_commit_done, nfs3_write_done
7 bytes: print_trace_stack, i8237A_suspend, store, module_frob_arch_sections, get_gate_vma, in_gate_area, in_gate_area_no_task, no_blink, noop_ret, no_action, next_online_pgdat, __pud_alloc, __pmd_alloc, generic_pipe_buf_pin, simple_sync_file, nfs4_callback_null, fuse_prepare_write, default_read_file, dummycon_dummy, vgacon_dummy, read_null, hung_up_tty_read, con_chars_in_buffer, class_device_create_uevent, anon_transport_dummy_function, ramdisk_writepages, sock_no_poll, noop_dequeue, ipv4_dst_check, rpcproc_encode_null, rpcproc_decode_null, pvc_shutdown, svc_shutdown
20 bytes: fn_show_ptregs, sysrq_handle_showregs
29 bytes: part_next, diskstats_next
10 bytes: atomic_notifier_call_chain, raw_notifier_call_chain
107 bytes: nlm_decode_cookie, nlm4_decode_cookie
5 bytes: do_spurious_interrupt_bug, c_stop, machine_shutdown, machine_halt, syscall_vma_close, native_nop, sighand_ctor, r_stop, s_stop, audit_log_task_context, noop, default_unplug_io_fn, frag_stop, single_stop, t_stop, devinfo_stop, int_seq_stop, parse_solaris_x86, parse_freebsd, parse_netbsd, parse_openbsd, parse_unixware, parse_minix, nfs_server_list_stop, nfs_volume_list_stop, nfs3_forget_cached_acls, fuse_read_inode, ipc_lock_by_ptr, ipc_unlock, crypto_exit_cipher_ops, crypto_exit_digest_ops, ioport_unmap, pci_remove_legacy_files, k_ignore, con_throttle, nv_vlan_rx_kill_vid, input_devices_seq_stop, input_handlers_seq_stop, proto_seq_stop, dev_seq_stop, softnet_seq_stop, dev_mc_seq_stop, neigh_stat_seq_stop, netlink_seq_stop, rt_cpu_seq_stop, tcp_twsk_destructor, raw_seq_stop, udp_seq_stop, icmp_address, icmp_discard, fib_seq_stop, unix_seq_stop, packet_seq_stop, rpc_default_callback, nul_destroy, nul_destroy_cred, c_stop, vcc_seq_stop, smp_setup_processor_id, remapped_pgdat_init, pre_setup_arch_hook, trap_init_hook, sched_init_smp, push_node_boundaries, page_alloc_init
16 bytes: svc_proc_unregister, rpc_proc_unregister
70 bytes: nfs_execute_read, nfs_execute_write
15 bytes: diskstats_stop, part_stop
10 bytes: machine_restart, emergency_restart
26 bytes: sysfs_put_link, fuse_put_link
81 bytes: nlmsvc_decode_notify, nlm4svc_decode_notify
68 bytes: svcauth_unix_release, svcauth_null_release
15 bytes: nr_free_buffer_pages, nr_free_pagecache_pages
20 bytes: ip_map_alloc, rsi_alloc, rsc_alloc
182 bytes: nlmsvc_retrieve_args, nlm4svc_retrieve_args
10 bytes: positive_have_wrcomb, compare_single, simple_delete_dentry, eventpollfs_delete_dentry, proc_delete_dentry, always_on, nul_match, hrtimer_cpu_notify
44 bytes: notesize, notesize
10 bytes: fn_show_mem, sysrq_handle_showmem
10 bytes: unx_lookup_cred, gss_lookup_cred
48 bytes: quirk_pcie_aspm_read, pci_read
20 bytes: open_kcore, open_port
29 bytes: part_attr_show, class_device_attr_show
69 bytes: diskstats_start, part_start
13 bytes: fn_show_state, sysrq_handle_showstate
16 bytes: nfs_proc_lock, nfs3_proc_lock
7 bytes: exact_match, default_write_file, write_null
12 bytes: dst_discard_out, dst_discard_in
55 bytes: nlmsvc_proc_granted_res, nlm4svc_proc_granted_res
10 bytes: do_posix_clock_nosettime, process_cpu_nsleep_restart, thread_cpu_nsleep, thread_cpu_nsleep_restart
23 bytes: kfifo_free, bio_free_map_data
34 bytes: part_attr_store, class_device_attr_store
16 bytes: init_once, fuse_inode_init_once, init_once
15 bytes: swap_io_context, u32_swap
33 bytes: __anon_vma_link, anon_vma_link
218 bytes: __copy_from_user_ll_nozero, __copy_from_user_ll, __copy_to_user_ll
10 bytes: udp_lib_close, udp_lib_close
10 bytes: neigh_seq_stop, qdisc_unlock_tree, icmp_xmit_unlock
11 bytes: pipefs_delete_dentry, sockfs_delete_dentry
155 bytes: rtattr_strlcpy, nla_strlcpy
33 bytes: shmem_dir_map, shmem_swp_map
62 bytes: cap_inode_removexattr, cap_inode_setxattr
51 bytes: nlmsvc_callback_exit, nlm4svc_callback_exit
13 bytes: part_release, sk_filter_rcu_free, sk_filter_rcu_free
15 bytes: nul_refresh, unx_refresh
13 bytes: put_bus, put_driver
16 bytes: native_write_idt_entry, native_write_ldt_entry, native_write_gdt_entry
10 bytes: nlmclnt_rpc_release, nlmsvc_callback_release, nlm4svc_callback_release
24 bytes: svcauth_unix_domain_release, svcauth_gss_domain_release
31 bytes: nlmsvc_proc_null, nlm4svc_proc_null
35 bytes: nfs_wait_schedule, nfs_wait_bit_interruptible, nfs4_wait_bit_interruptible, rpc_wait_bit_interruptible
15 bytes: crypto_init_digest_flags, crypto_init_compress_flags
10 bytes: platform_drv_probe_fail, sock_no_open
25 bytes: cmp_ex, cmp_node_active_region
74 bytes: simple_get_bytes, simple_get_bytes
10 bytes: bad_pipe_r, bad_pipe_w
95 bytes: nlmsvc_decode_reboot, nlm4svc_decode_reboot
10 bytes: do_posix_clock_nonanosleep, sock_no_bind, sock_no_connect, sock_no_socketpair, sock_no_accept, sock_no_getname, sock_no_ioctl, sock_no_listen, sock_no_shutdown, sock_no_setsockopt, sock_no_getsockopt, sock_no_sendmsg, sock_no_recvmsg
27 bytes: xor_128, xor_128

Found 217 instances of 73 duplicated functions
Possible savings: 4034 bytes (of 1460830 total)


Attachments:
(No filename) (8.00 kB)
dupfunc.rb (3.21 kB)
Download all attachments

2007-01-05 15:41:17

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

On Thu, 2007-01-04 at 23:52 +0000, Al Viro wrote:
> On Thu, Jan 04, 2007 at 03:21:06PM -0800, Mitchell Blank Jr wrote:
> > Linus Torvalds wrote:
> > > Well, that probably would work, but it's also true that returning a 64-bit
> > > value on a 32-bit platform really _does_ depend on more than the size.
> >
> > Yeah, obviously this is restricted to the signed-integer case. My point
> > was just that you could have the compiler figure out which variant to pick
> > for loff_t automatically.
> >
> > > "let's not play tricks with function types at all".
> >
> > I think I agree. The real (but harder) fix for the wasted space issue
> > would be to get the toolchain to automatically combine functions that
> > end up compiling into identical assembly.
>
> Can't do.
>

you could if it's static and never has it's address taken (but that's
not the case here)


> You _can_ compile g into jump to f, but that's it. And that, AFAICS,
> is what gcc does.

I thought we actually disabled that in the kernel (it makes oopses
harder to read)....



--
if you want to mail me at work (you don't), use arjan (at) linux.intel.com
Test the interaction between Linux and your BIOS via http://www.linuxfirmwarekit.org

2007-01-07 02:15:54

by Roman Zippel

[permalink] [raw]
Subject: Re: [UPDATED PATCH] fix memory corruption from misinterpreted bad_inode_ops return values

Hi,

On Thu, 4 Jan 2007, Al Viro wrote:

> PS: what would be the sane strategy for timer series merge, BTW?

PPS: I still don't like it. It fixes a rather theoretical problem with
absolutely no practical relevance.
PPPS: type safety is also possible with container_of(), the prototype
patch below demonstrates how to check that the signature matches and
additionally it doesn't require to convert everything at once. More
sophisticated checks can be done by putting this information into a
separate section, from where it can be extracted at compile/run time.

bye, Roman

---
include/linux/timer.h | 24 ++++++++++++++++++++++++
kernel/timer.c | 18 +++++++++++++++++-
kernel/workqueue.c | 24 ++++++++++--------------
3 files changed, 51 insertions(+), 15 deletions(-)

Index: linux-2.6-git/include/linux/timer.h
===================================================================
--- linux-2.6-git.orig/include/linux/timer.h 2007-01-06 20:45:02.000000000 +0100
+++ linux-2.6-git/include/linux/timer.h 2007-01-06 21:00:07.000000000 +0100
@@ -13,16 +13,40 @@ struct timer_list {

void (*function)(unsigned long);
unsigned long data;
+ unsigned long _sig;

struct tvec_t_base_s *base;
};

+#define __timer_sig(type, member) ((offsetof(type, member) << 16) | sizeof(type))
+
+#define __TIMER_OLD_SIG (0xa5005a)
+
+typedef void timer_cb_t(struct timer_list *);
+
+extern void __timer_init(struct timer_list *t, timer_cb_t *func, long sig);
+
+#define timer_init(ptr, member, func) \
+ __timer_init(&(ptr)->member, func, \
+ __timer_sig(typeof(*ptr), member))
+
+#define timer_of(ptr, type, member) ({ \
+ const struct timer_list *_t = (ptr); \
+ if (_t->_sig != __timer_sig(type, member)) { \
+ WARN_ON(_t->_sig != -__timer_sig(type, member));\
+ return; \
+ } \
+ container_of(ptr, type, member); \
+})
+
+
extern struct tvec_t_base_s boot_tvec_bases;

#define TIMER_INITIALIZER(_function, _expires, _data) { \
.function = (_function), \
.expires = (_expires), \
.data = (_data), \
+ ._sig = __TIMER_OLD_SIG, \
.base = &boot_tvec_bases, \
}

Index: linux-2.6-git/kernel/timer.c
===================================================================
--- linux-2.6-git.orig/kernel/timer.c 2007-01-06 20:45:02.000000000 +0100
+++ linux-2.6-git/kernel/timer.c 2007-01-06 21:00:07.000000000 +0100
@@ -226,6 +226,11 @@ static void internal_add_timer(tvec_base
unsigned long idx = expires - base->timer_jiffies;
struct list_head *vec;

+ if (!timer->_sig) {
+ WARN_ON(1);
+ timer->_sig = __TIMER_OLD_SIG;
+ }
+
if (idx < TVR_SIZE) {
int i = expires & TVR_MASK;
vec = base->tv1.vec + i;
@@ -271,11 +276,22 @@ static void internal_add_timer(tvec_base
*/
void fastcall init_timer(struct timer_list *timer)
{
+ timer->_sig = __TIMER_OLD_SIG;
timer->entry.next = NULL;
timer->base = __raw_get_cpu_var(tvec_bases);
}
EXPORT_SYMBOL(init_timer);

+extern void __timer_init(struct timer_list *t, timer_cb_t *func, long sig)
+{
+ t->function = (void *)func;
+ t->_sig = -sig;
+ func(t);
+ t->_sig = sig;
+ t->entry.next = NULL;
+ t->base = __raw_get_cpu_var(tvec_bases);
+}
+
static inline void detach_timer(struct timer_list *timer,
int clear_pending)
{
@@ -567,7 +583,7 @@ static inline void __run_timers(tvec_bas

timer = list_entry(head->next,struct timer_list,entry);
fn = timer->function;
- data = timer->data;
+ data = timer->_sig == __TIMER_OLD_SIG ? timer->data : (unsigned long)timer;

set_running_timer(base, timer);
detach_timer(timer, 1);
Index: linux-2.6-git/kernel/workqueue.c
===================================================================
--- linux-2.6-git.orig/kernel/workqueue.c 2007-01-06 19:51:40.000000000 +0100
+++ linux-2.6-git/kernel/workqueue.c 2007-01-06 21:02:59.000000000 +0100
@@ -218,9 +218,9 @@ int fastcall queue_work(struct workqueue
}
EXPORT_SYMBOL_GPL(queue_work);

-static void delayed_work_timer_fn(unsigned long __data)
+static void delayed_work_timer_fn(struct timer_list *timer)
{
- struct delayed_work *dwork = (struct delayed_work *)__data;
+ struct delayed_work *dwork = timer_of(timer, struct delayed_work, timer);
struct workqueue_struct *wq = get_wq_data(&dwork->work);
int cpu = smp_processor_id();

@@ -242,22 +242,20 @@ int fastcall queue_delayed_work(struct w
struct delayed_work *dwork, unsigned long delay)
{
int ret = 0;
- struct timer_list *timer = &dwork->timer;
struct work_struct *work = &dwork->work;

if (delay == 0)
return queue_work(wq, work);

if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
- BUG_ON(timer_pending(timer));
+ BUG_ON(timer_pending(&dwork->timer));
BUG_ON(!list_empty(&work->entry));

/* This stores wq for the moment, for the timer_fn */
set_wq_data(work, wq);
- timer->expires = jiffies + delay;
- timer->data = (unsigned long)dwork;
- timer->function = delayed_work_timer_fn;
- add_timer(timer);
+ timer_init(dwork, timer, delayed_work_timer_fn);
+ dwork->timer.expires = jiffies + delay;
+ add_timer(&dwork->timer);
ret = 1;
}
return ret;
@@ -277,19 +275,17 @@ int queue_delayed_work_on(int cpu, struc
struct delayed_work *dwork, unsigned long delay)
{
int ret = 0;
- struct timer_list *timer = &dwork->timer;
struct work_struct *work = &dwork->work;

if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
- BUG_ON(timer_pending(timer));
+ BUG_ON(timer_pending(&dwork->timer));
BUG_ON(!list_empty(&work->entry));

/* This stores wq for the moment, for the timer_fn */
set_wq_data(work, wq);
- timer->expires = jiffies + delay;
- timer->data = (unsigned long)dwork;
- timer->function = delayed_work_timer_fn;
- add_timer_on(timer, cpu);
+ timer_init(dwork, timer, delayed_work_timer_fn);
+ dwork->timer.expires = jiffies + delay;
+ add_timer_on(&dwork->timer, cpu);
ret = 1;
}
return ret;