From: Pekka Enberg <[email protected]>
The revokeat(2) and frevoke(2) system calls invalidate open file
descriptors and shared mappings of an inode. After an successful
revocation, operations on file descriptors fail with the EBADF or
ENXIO error code for regular and device files,
respectively. Attempting to read from or write to a revoked mapping
causes SIGBUS.
The actual operation is done in two passes:
1. Revoke all file descriptors that point to the given inode. We do
this under tasklist_lock so that after this pass, we don't need
to worry about racing with close(2) or dup(2).
2. Take down shared memory mappings of each revoked file and close
the file pointer.
The file descriptors are kept until the owning task does close(2) and
memory mapping ranges preserved until the owning task does munmap(2).
Signed-off-by: Pekka Enberg <[email protected]>
---
arch/i386/kernel/syscall_table.S | 3
fs/Makefile | 2
fs/ext2/file.c | 1
fs/ext3/file.c | 1
fs/file_table.c | 1
fs/revoke.c | 588 ++++++++++++++++++++++++++++++++++
fs/revoked_inode.c | 664 +++++++++++++++++++++++++++++++++++++++
include/asm-i386/unistd.h | 4
include/linux/file.h | 14
include/linux/fs.h | 6
include/linux/mm.h | 2
include/linux/syscalls.h | 3
mm/memory.c | 3
mm/mmap.c | 11
14 files changed, 1298 insertions(+), 5 deletions(-)
Index: 2.6/arch/i386/kernel/syscall_table.S
===================================================================
--- 2.6.orig/arch/i386/kernel/syscall_table.S
+++ 2.6/arch/i386/kernel/syscall_table.S
@@ -319,3 +319,6 @@ ENTRY(sys_call_table)
.long sys_move_pages
.long sys_getcpu
.long sys_epoll_pwait
+ .long sys_revokeat /* 320 */
+ .long sys_frevoke
+
Index: 2.6/fs/Makefile
===================================================================
--- 2.6.orig/fs/Makefile
+++ 2.6/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.
attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o drop_caches.o splice.o sync.o utimes.o \
- stack.o
+ stack.o revoke.o revoked_inode.o
ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
Index: 2.6/fs/revoke.c
===================================================================
--- /dev/null
+++ 2.6/fs/revoke.c
@@ -0,0 +1,588 @@
+/*
+ * fs/revoke.c - Invalidate all current open file descriptors of an inode.
+ *
+ * Copyright (C) 2006-2007 Pekka Enberg
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/sched.h>
+
+/*
+ * We pre-allocate an array of file pointers (including dummy inodes)
+ * so that we do not need to do kmalloc() under tasklist_lock. The
+ * revoke operation is done in two passes: first we revoke all fds
+ * pointing to an inode and then we do close/munmap in a second pass.
+ */
+struct revoke_table {
+ struct file **files;
+ unsigned long nr_files; /* capacity */
+ unsigned long nr_revoked; /* used in first pass */
+ unsigned long nr_closed; /* used in second pass */
+};
+
+struct kmem_cache *revokefs_inode_cache;
+
+/*
+ * Revoked file descriptors point to inodes in the revokefs filesystem.
+ */
+static struct vfsmount *revokefs_mnt;
+
+struct revokefs_inode_info {
+ struct task_struct *owner;
+ struct file *file;
+ unsigned int fd;
+ struct inode vfs_inode;
+};
+
+static inline struct revokefs_inode_info *REVOKEFS_I(struct inode *inode)
+{
+ return container_of(inode, struct revokefs_inode_info, vfs_inode);
+}
+
+extern void make_revoked_inode(struct inode *, int);
+
+static struct file *get_revoked_file(void)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+ struct file *filp;
+ struct qstr name;
+
+ filp = get_empty_filp();
+ if (!filp)
+ goto err;
+
+ inode = new_inode(revokefs_mnt->mnt_sb);
+ if (!inode)
+ goto err_inode;
+
+ name.name = "revoked_file";
+ name.len = strlen(name.name);
+ dentry = d_alloc(revokefs_mnt->mnt_sb->s_root, &name);
+ if (!dentry)
+ goto err_dentry;
+
+ d_instantiate(dentry, inode);
+
+ filp->f_mapping = inode->i_mapping;
+ filp->f_dentry = dget(dentry);
+ filp->f_vfsmnt = mntget(revokefs_mnt);
+ filp->f_op = fops_get(inode->i_fop);
+ filp->f_pos = 0;
+
+ return filp;
+
+ err_dentry:
+ iput(inode);
+ err_inode:
+ fput(filp);
+ err:
+ return NULL;
+}
+
+static inline int inode_matches(struct file *file, struct inode *inode,
+ struct file *to_exclude)
+{
+ return file && file != to_exclude && file->f_dentry->d_inode == inode;
+}
+
+static inline bool revoke_table_is_full(struct revoke_table *table)
+{
+ return table->nr_revoked == table->nr_files;
+}
+
+static inline struct file *revoke_table_get(struct revoke_table *table)
+{
+ return table->files[table->nr_revoked++];
+}
+
+/*
+ * LOCKING: task_lock(owner)
+ */
+static int revoke_fds(struct task_struct *owner,
+ struct inode *inode,
+ struct file *to_exclude,
+ struct revoke_table *table)
+{
+ struct files_struct *files;
+ struct fdtable *fdt;
+ unsigned int fd;
+ int err = 0;
+
+ files = get_files_struct(owner);
+ if (!files)
+ goto out;
+
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+
+ for (fd = 0; fd < fdt->max_fds; fd++) {
+ struct revokefs_inode_info *info;
+ struct file *filp, *new_filp;
+ struct inode *new_inode;
+
+ filp = fcheck_files(files, fd);
+ if (!inode_matches(filp, inode, to_exclude))
+ continue;
+
+ if (!filp->f_op->revoke) {
+ err = -EOPNOTSUPP;
+ goto failed;
+ }
+
+ if (revoke_table_is_full(table)) {
+ err = -ENOMEM;
+ goto failed;
+ }
+
+ new_filp = revoke_table_get(table);
+ get_file(new_filp);
+
+ /*
+ * Replace original struct file pointer with a pointer to
+ * a 'revoked file.' After this point, we don't need to worry
+ * about racing with sys_close or sys_dup.
+ */
+ rcu_assign_pointer(fdt->fd[fd], new_filp);
+
+ /*
+ * Hold on to task until we can take down the file and its
+ * mmap.
+ */
+ get_task_struct(owner);
+
+ new_inode = new_filp->f_dentry->d_inode;
+ make_revoked_inode(new_inode, inode->i_mode & S_IFMT);
+
+ info = REVOKEFS_I(new_inode);
+ info->fd = fd;
+ info->file = filp;
+ info->owner = owner;
+ }
+ failed:
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+ out:
+ return err;
+}
+
+static int revoke_mmap(struct revokefs_inode_info *revoked)
+{
+ struct vm_area_struct *this, *next;
+ struct mm_struct *mm;
+ int err = 0;
+
+ mm = get_task_mm(revoked->owner);
+ down_write(&mm->mmap_sem);
+
+ this = mm->mmap;
+ while (this) {
+ next = this->vm_next;
+ if (this->vm_flags & VM_SHARED &&
+ this->vm_file == revoked->file) {
+ this->vm_flags |= VM_REVOKED;
+ zap_page_range(this, this->vm_start,
+ this->vm_end - this->vm_start, NULL);
+ }
+ this = next;
+ }
+ up_write(&mm->mmap_sem);
+ mmput(mm);
+ return err;
+}
+
+static int revoke_filp(struct revokefs_inode_info *info)
+{
+ struct files_struct *files;
+ int err = 0;
+
+ files = get_files_struct(info->owner);
+ if (files) {
+ while (info->file->f_light)
+ schedule();
+ err = filp_close(info->file, files);
+ put_files_struct(files);
+ }
+ return err;
+}
+
+static void restore_file(struct revokefs_inode_info *info)
+{
+ struct files_struct *files;
+
+ files = get_files_struct(info->owner);
+ if (files) {
+ struct fdtable *fdt;
+ struct file *filp;
+
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+
+ filp = fdt->fd[info->fd];
+ if (filp)
+ fput(filp);
+
+ rcu_assign_pointer(fdt->fd[info->fd], info->file);
+ FD_SET(info->fd, fdt->close_on_exec);
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+ }
+ put_task_struct(info->owner);
+ info->owner = NULL; /* To avoid double-restore. */
+}
+
+static void restore_files(struct revoke_table *table)
+{
+ unsigned long i;
+
+ for (i = table->nr_closed; i < table->nr_files; i++) {
+ struct revokefs_inode_info *info;
+ struct file *filp;
+
+ filp = table->files[i];
+ info = REVOKEFS_I(filp->f_dentry->d_inode);
+ BUG_ON(!info->owner);
+
+ restore_file(info);
+ }
+}
+
+static int close_file(struct revokefs_inode_info *info)
+{
+ struct file *file;
+ int err;
+
+ err = revoke_mmap(info);
+ if (err)
+ goto out;
+
+ file = info->file;
+
+ err = file->f_op->revoke(file);
+ if (err)
+ goto out;
+
+ err = revoke_filp(info);
+ out:
+ return err;
+}
+
+static int close_files(struct revoke_table *table)
+{
+ unsigned long i;
+ int err = 0;
+
+ for (i = 0; i < table->nr_revoked; i++) {
+ struct revokefs_inode_info *info;
+ struct file *this;
+
+ this = table->files[i];
+ info = REVOKEFS_I(this->f_dentry->d_inode);
+ BUG_ON(!info->owner);
+
+ /*
+ * Increase count before attempting to close file as
+ * an partially closed file can no longer be restored.
+ */
+ table->nr_closed++;
+ err = close_file(info);
+ put_task_struct(info->owner);
+ info->owner = NULL; /* To avoid restoring closed file. */
+ if (err)
+ goto failed;
+ }
+ return 0;
+
+ failed:
+ restore_files(table);
+ return err;
+}
+
+/*
+ * Returns the maximum number of fds pointing to inode.
+ *
+ * LOCKING: read_lock(&tasklist_lock)
+ */
+static unsigned long inode_fds(struct inode *inode, struct file *to_exclude)
+{
+ struct task_struct *g, *p;
+ unsigned long nr_fds = 0;
+
+ do_each_thread(g, p) {
+ struct files_struct *files;
+ struct fdtable *fdt;
+ unsigned int fd;
+
+ files = get_files_struct(p);
+ if (!files)
+ continue;
+
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+ for (fd = 0; fd < fdt->max_fds; fd++) {
+ struct file *file;
+
+ file = fcheck_files(files, fd);
+ if (inode_matches(file, inode, to_exclude)) {
+ nr_fds += fdt->max_fds;
+ break;
+ }
+ }
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+ } while_each_thread(g, p);
+ return nr_fds;
+}
+
+static void free_revoke_table(struct revoke_table *table)
+{
+ int i;
+
+ for (i = 0; i < table->nr_files; i++)
+ fput(table->files[i]);
+
+ kfree(table->files);
+ kfree(table);
+}
+
+static struct revoke_table *__alloc_revoke_table(unsigned long nr_fds)
+{
+ struct revoke_table *table;
+ int i;
+
+ table = kzalloc(sizeof *table, GFP_KERNEL);
+ if (!table)
+ return NULL;
+
+ table->nr_files = nr_fds;
+ table->files = kcalloc(nr_fds, sizeof(struct file *), GFP_KERNEL);
+ if (!table->files) {
+ kfree(table);
+ return NULL;
+ }
+
+ for (i = 0; i < table->nr_files; i++) {
+ struct file *filp;
+
+ filp = get_revoked_file();
+ if (!filp)
+ goto err;
+
+ table->files[i] = filp;
+ }
+ return table;
+ err:
+ free_revoke_table(table);
+ return NULL;
+}
+
+static struct revoke_table *alloc_revoke_table(struct inode *inode,
+ struct file *to_exclude)
+{
+ unsigned long nr_fds;
+
+ read_lock(&tasklist_lock);
+ nr_fds = inode_fds(inode, to_exclude);
+ read_unlock(&tasklist_lock);
+
+ return __alloc_revoke_table(nr_fds);
+}
+
+static int do_revoke(struct inode *inode, struct file *to_exclude)
+{
+ struct revoke_table *table = NULL;
+ struct task_struct *g, *p;
+ int err = 0;
+
+ if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) {
+ err = -EPERM;
+ goto out;
+ }
+
+ retry:
+ if (signal_pending(current)) {
+ err = -ERESTARTSYS;
+ goto out;
+ }
+
+ table = alloc_revoke_table(inode, to_exclude);
+ if (!table) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ read_lock(&tasklist_lock);
+
+ /*
+ * If someone forked while we were allocating memory, try again.
+ */
+ if (inode_fds(inode, to_exclude) > table->nr_files) {
+ read_unlock(&tasklist_lock);
+ free_revoke_table(table);
+ goto retry;
+ }
+
+ /*
+ * First revoke the fds. After we are done, no one can start new
+ * operations on them.
+ */
+ do_each_thread(g, p) {
+ err = revoke_fds(p, inode, to_exclude, table);
+ if (err)
+ goto exit_loop;
+ } while_each_thread(g, p);
+ exit_loop:
+ read_unlock(&tasklist_lock);
+
+ if (err) {
+ restore_files(table);
+ goto out;
+ }
+
+ /*
+ * Now, take down the mmaps and close the files for good.
+ */
+ err = close_files(table);
+ out:
+ free_revoke_table(table);
+ return err;
+}
+
+asmlinkage int sys_revokeat(int dfd, const char __user *filename)
+{
+ struct nameidata nd;
+ int err;
+
+ err = __user_walk_fd(dfd, filename, 0, &nd);
+ if (!err) {
+ err = do_revoke(nd.dentry->d_inode, NULL);
+ path_release(&nd);
+ }
+ return err;
+}
+
+asmlinkage int sys_frevoke(unsigned int fd)
+{
+ struct file *file = fget(fd);
+ int err = -EBADF;
+
+ if (file) {
+ err = do_revoke(file->f_dentry->d_inode, file);
+ fput(file);
+ }
+ return err;
+}
+
+int generic_file_revoke(struct file *file)
+{
+ int err;
+
+ /*
+ * Flush pending writes.
+ */
+ err = do_fsync(file, 1);
+ if (err)
+ goto out;
+
+ /*
+ * Make pending reads fail.
+ */
+ err = invalidate_inode_pages2(file->f_mapping);
+
+ out:
+ return err;
+}
+EXPORT_SYMBOL(generic_file_revoke);
+
+/*
+ * Filesystem for revoked files.
+ */
+
+static struct inode *revokefs_alloc_inode(struct super_block *sb)
+{
+ struct revokefs_inode_info *info;
+
+ info = kmem_cache_alloc(revokefs_inode_cache, GFP_NOFS);
+ if (!info)
+ return NULL;
+
+ return &info->vfs_inode;
+}
+
+static void revokefs_destroy_inode(struct inode *inode)
+{
+ kmem_cache_free(revokefs_inode_cache, REVOKEFS_I(inode));
+}
+
+#define REVOKEFS_MAGIC 0x5245564B /* REVK */
+
+static struct super_operations revokefs_super_ops = {
+ .alloc_inode = revokefs_alloc_inode,
+ .destroy_inode = revokefs_destroy_inode,
+ .drop_inode = generic_delete_inode,
+};
+
+static int revokefs_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data,
+ struct vfsmount *mnt)
+
+{
+ return get_sb_pseudo(fs_type, "revoke:", &revokefs_super_ops, REVOKEFS_MAGIC, mnt);
+}
+
+struct file_system_type revokefs_fs_type = {
+ .name = "revokefs",
+ .get_sb = revokefs_get_sb,
+ .kill_sb = kill_anon_super
+};
+
+static void init_once(void *obj, struct kmem_cache *cache, unsigned long flags)
+{
+ struct revokefs_inode_info *info = obj;
+
+ if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+ SLAB_CTOR_CONSTRUCTOR) {
+ info->owner = NULL;
+ inode_init_once(&info->vfs_inode);
+ }
+}
+
+static int __init revokefs_init(void)
+{
+ int err = -ENOMEM;
+
+ revokefs_inode_cache =
+ kmem_cache_create("revokefs_inode_cache",
+ sizeof(struct revokefs_inode_info),
+ 0,
+ (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
+ init_once, NULL);
+ if (!revokefs_inode_cache)
+ goto out;
+
+ err = register_filesystem(&revokefs_fs_type);
+ if (err)
+ goto err_register;
+
+ revokefs_mnt = kern_mount(&revokefs_fs_type);
+ if (IS_ERR(revokefs_mnt)) {
+ err = PTR_ERR(revokefs_mnt);
+ goto err_mnt;
+ }
+ out:
+ return err;
+ err_mnt:
+ unregister_filesystem(&revokefs_fs_type);
+ err_register:
+ kmem_cache_destroy(revokefs_inode_cache);
+ return err;
+}
+late_initcall(revokefs_init);
Index: 2.6/include/asm-i386/unistd.h
===================================================================
--- 2.6.orig/include/asm-i386/unistd.h
+++ 2.6/include/asm-i386/unistd.h
@@ -325,10 +325,12 @@
#define __NR_move_pages 317
#define __NR_getcpu 318
#define __NR_epoll_pwait 319
+#define __NR_revokeat 320
+#define __NR_frevoke 321
#ifdef __KERNEL__
-#define NR_syscalls 320
+#define NR_syscalls 322
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
Index: 2.6/include/linux/syscalls.h
===================================================================
--- 2.6.orig/include/linux/syscalls.h
+++ 2.6/include/linux/syscalls.h
@@ -605,4 +605,7 @@ asmlinkage long sys_getcpu(unsigned __us
int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
+asmlinkage int sys_revokeat(int dfd, const char __user *filename);
+asmlinkage int sys_frevoke(unsigned int fd);
+
#endif
Index: 2.6/fs/file_table.c
===================================================================
--- 2.6.orig/fs/file_table.c
+++ 2.6/fs/file_table.c
@@ -219,6 +219,7 @@ struct file fastcall *fget_light(unsigne
*fput_needed = 0;
if (likely((atomic_read(&files->count) == 1))) {
file = fcheck_files(files, fd);
+ set_f_light(file);
} else {
rcu_read_lock();
file = fcheck_files(files, fd);
Index: 2.6/include/linux/file.h
===================================================================
--- 2.6.orig/include/linux/file.h
+++ 2.6/include/linux/file.h
@@ -6,6 +6,7 @@
#define __LINUX_FILE_H
#include <asm/atomic.h>
+#include <linux/fs.h>
#include <linux/posix_types.h>
#include <linux/compiler.h>
#include <linux/spinlock.h>
@@ -62,10 +63,23 @@ extern struct kmem_cache *filp_cachep;
extern void FASTCALL(__fput(struct file *));
extern void FASTCALL(fput(struct file *));
+static inline void clear_f_light(struct file *file)
+{
+ file->f_light = 0;
+}
+
+static inline void set_f_light(struct file *file)
+{
+ if (file)
+ file->f_light = 1;
+}
+
static inline void fput_light(struct file *file, int fput_needed)
{
if (unlikely(fput_needed))
fput(file);
+ else
+ clear_f_light(file);
}
extern struct file * FASTCALL(fget(unsigned int fd));
Index: 2.6/include/linux/fs.h
===================================================================
--- 2.6.orig/include/linux/fs.h
+++ 2.6/include/linux/fs.h
@@ -737,6 +737,8 @@ struct file {
struct list_head f_ep_links;
spinlock_t f_ep_lock;
#endif /* #ifdef CONFIG_EPOLL */
+ /* This instance is being used without holding a reference. */
+ int f_light;
struct address_space *f_mapping;
};
extern spinlock_t files_lock;
@@ -1098,6 +1100,7 @@ struct file_operations {
int (*flock) (struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
+ int (*revoke)(struct file *);
};
struct inode_operations {
@@ -1732,6 +1735,9 @@ extern ssize_t generic_splice_sendpage(s
extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
size_t len, unsigned int flags);
+/* fs/revoke.c */
+extern int generic_file_revoke(struct file *);
+
extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
Index: 2.6/fs/ext2/file.c
===================================================================
--- 2.6.orig/fs/ext2/file.c
+++ 2.6/fs/ext2/file.c
@@ -56,6 +56,7 @@ const struct file_operations ext2_file_o
.sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
+ .revoke = generic_file_revoke,
};
#ifdef CONFIG_EXT2_FS_XIP
Index: 2.6/include/linux/mm.h
===================================================================
--- 2.6.orig/include/linux/mm.h
+++ 2.6/include/linux/mm.h
@@ -169,6 +169,8 @@ extern unsigned int kobjsize(const void
#define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */
#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */
+#define VM_REVOKED 0x04000000 /* Mapping has been revoked */
+
#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
#endif
Index: 2.6/mm/memory.c
===================================================================
--- 2.6.orig/mm/memory.c
+++ 2.6/mm/memory.c
@@ -2460,6 +2460,9 @@ int __handle_mm_fault(struct mm_struct *
if (unlikely(is_vm_hugetlb_page(vma)))
return hugetlb_fault(mm, vma, address, write_access);
+ if (unlikely(vma->vm_flags & VM_REVOKED))
+ return VM_FAULT_SIGBUS;
+
pgd = pgd_offset(mm, address);
pud = pud_alloc(mm, pgd, address);
if (!pud)
Index: 2.6/mm/mmap.c
===================================================================
--- 2.6.orig/mm/mmap.c
+++ 2.6/mm/mmap.c
@@ -1028,6 +1028,8 @@ unsigned long do_mmap_pgoff(struct file
error = -ENOMEM;
munmap_back:
vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
+ if (unlikely(vma->vm_flags & VM_REVOKED))
+ return -ENODEV;
if (vma && vma->vm_start < addr + len) {
if (do_munmap(mm, addr, len))
return -ENOMEM;
@@ -1679,13 +1681,16 @@ static void unmap_region(struct mm_struc
{
struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
struct mmu_gather *tlb;
- unsigned long nr_accounted = 0;
lru_add_drain();
tlb = tlb_gather_mmu(mm, 0);
update_hiwater_rss(mm);
- unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
- vm_unacct_memory(nr_accounted);
+ if (!(vma->vm_flags & VM_REVOKED)) {
+ unsigned long nr_accounted = 0;
+
+ unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
+ vm_unacct_memory(nr_accounted);
+ }
free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
next? next->vm_start: 0);
tlb_finish_mmu(tlb, start, end);
Index: 2.6/fs/ext3/file.c
===================================================================
--- 2.6.orig/fs/ext3/file.c
+++ 2.6/fs/ext3/file.c
@@ -123,6 +123,7 @@ const struct file_operations ext3_file_o
.sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
+ .revoke = generic_file_revoke,
};
struct inode_operations ext3_file_inode_operations = {
Index: 2.6/fs/revoked_inode.c
===================================================================
--- /dev/null
+++ 2.6/fs/revoked_inode.c
@@ -0,0 +1,664 @@
+/*
+ * linux/fs/revoked_inode.c
+ *
+ * Copyright (C) 1997, Stephen Tweedie
+ *
+ * Provide stub functions for revoked inodes
+ */
+
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/stat.h>
+#include <linux/time.h>
+#include <linux/smp_lock.h>
+#include <linux/namei.h>
+#include <linux/poll.h>
+
+static loff_t revoked_file_llseek(struct file *file, loff_t offset, int origin)
+{
+ return -EBADF;
+}
+
+static ssize_t revoked_file_read(struct file *filp, char __user * buf,
+ size_t size, loff_t * ppos)
+{
+ return -EBADF;
+}
+
+static ssize_t revoked_file_write(struct file *filp, const char __user * buf,
+ size_t siz, loff_t * ppos)
+{
+ return -EBADF;
+}
+
+static ssize_t revoked_file_aio_read(struct kiocb *iocb,
+ const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ return -EBADF;
+}
+
+static ssize_t revoked_file_aio_write(struct kiocb *iocb,
+ const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ return -EBADF;
+}
+
+static int revoked_file_readdir(struct file *filp, void *dirent,
+ filldir_t filldir)
+{
+ return -EBADF;
+}
+
+static unsigned int revoked_file_poll(struct file *filp, poll_table * wait)
+{
+ return POLLERR;
+}
+
+static int revoked_file_ioctl(struct inode *inode, struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ return -EBADF;
+}
+
+static long revoked_file_unlocked_ioctl(struct file *file, unsigned cmd,
+ unsigned long arg)
+{
+ return -EBADF;
+}
+
+static long revoked_file_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ return -EBADF;
+}
+
+static int revoked_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ return -EBADF;
+}
+
+static int revoked_file_open(struct inode *inode, struct file *filp)
+{
+ return -EBADF;
+}
+
+static int revoked_file_flush(struct file *file, fl_owner_t id)
+{
+ return 0;
+}
+
+static int revoked_file_release(struct inode *inode, struct file *filp)
+{
+ return -EBADF;
+}
+
+static int revoked_file_fsync(struct file *file, struct dentry *dentry,
+ int datasync)
+{
+ return -EBADF;
+}
+
+static int revoked_file_aio_fsync(struct kiocb *iocb, int datasync)
+{
+ return -EBADF;
+}
+
+static int revoked_file_fasync(int fd, struct file *filp, int on)
+{
+ return -EBADF;
+}
+
+static int revoked_file_lock(struct file *file, int cmd, struct file_lock *fl)
+{
+ return -EBADF;
+}
+
+static ssize_t revoked_file_sendfile(struct file *in_file, loff_t * ppos,
+ size_t count, read_actor_t actor,
+ void *target)
+{
+ return -EBADF;
+}
+
+static ssize_t revoked_file_sendpage(struct file *file, struct page *page,
+ int off, size_t len, loff_t * pos,
+ int more)
+{
+ return -EBADF;
+}
+
+static unsigned long revoked_file_get_unmapped_area(struct file *file,
+ unsigned long addr,
+ unsigned long len,
+ unsigned long pgoff,
+ unsigned long flags)
+{
+ return -EBADF;
+}
+
+static int revoked_file_check_flags(int flags)
+{
+ return -EBADF;
+}
+
+static int revoked_file_dir_notify(struct file *file, unsigned long arg)
+{
+ return -EBADF;
+}
+
+static int revoked_file_flock(struct file *filp, int cmd, struct file_lock *fl)
+{
+ return -EBADF;
+}
+
+static ssize_t revoked_file_splice_write(struct pipe_inode_info *pipe,
+ struct file *out, loff_t * ppos,
+ size_t len, unsigned int flags)
+{
+ return -EBADF;
+}
+
+static ssize_t revoked_file_splice_read(struct file *in, loff_t * ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags)
+{
+ return -EBADF;
+}
+
+static const struct file_operations revoked_file_ops = {
+ .llseek = revoked_file_llseek,
+ .read = revoked_file_read,
+ .write = revoked_file_write,
+ .aio_read = revoked_file_aio_read,
+ .aio_write = revoked_file_aio_write,
+ .readdir = revoked_file_readdir,
+ .poll = revoked_file_poll,
+ .ioctl = revoked_file_ioctl,
+ .unlocked_ioctl = revoked_file_unlocked_ioctl,
+ .compat_ioctl = revoked_file_compat_ioctl,
+ .mmap = revoked_file_mmap,
+ .open = revoked_file_open,
+ .flush = revoked_file_flush,
+ .release = revoked_file_release,
+ .fsync = revoked_file_fsync,
+ .aio_fsync = revoked_file_aio_fsync,
+ .fasync = revoked_file_fasync,
+ .lock = revoked_file_lock,
+ .sendfile = revoked_file_sendfile,
+ .sendpage = revoked_file_sendpage,
+ .get_unmapped_area = revoked_file_get_unmapped_area,
+ .check_flags = revoked_file_check_flags,
+ .dir_notify = revoked_file_dir_notify,
+ .flock = revoked_file_flock,
+ .splice_write = revoked_file_splice_write,
+ .splice_read = revoked_file_splice_read,
+};
+
+static int revoked_inode_create(struct inode *dir, struct dentry *dentry,
+ int mode, struct nameidata *nd)
+{
+ return -EBADF;
+}
+
+static struct dentry *revoked_inode_lookup(struct inode *dir,
+ struct dentry *dentry,
+ struct nameidata *nd)
+{
+ return ERR_PTR(-EBADF);
+}
+
+static int revoked_inode_link(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *dentry)
+{
+ return -EBADF;
+}
+
+static int revoked_inode_unlink(struct inode *dir, struct dentry *dentry)
+{
+ return -EBADF;
+}
+
+static int revoked_inode_symlink(struct inode *dir, struct dentry *dentry,
+ const char *symname)
+{
+ return -EBADF;
+}
+
+static int revoked_inode_mkdir(struct inode *dir, struct dentry *dentry,
+ int mode)
+{
+ return -EBADF;
+}
+
+static int revoked_inode_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ return -EBADF;
+}
+
+static int revoked_inode_mknod(struct inode *dir, struct dentry *dentry,
+ int mode, dev_t rdev)
+{
+ return -EBADF;
+}
+
+static int revoked_inode_rename(struct inode *old_dir,
+ struct dentry *old_dentry,
+ struct inode *new_dir,
+ struct dentry *new_dentry)
+{
+ return -EBADF;
+}
+
+static int revoked_inode_readlink(struct dentry *dentry, char __user * buffer,
+ int buflen)
+{
+ return -EBADF;
+}
+
+static int revoked_inode_permission(struct inode *inode, int mask,
+ struct nameidata *nd)
+{
+ return -EBADF;
+}
+
+static int revoked_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ return -EBADF;
+}
+
+static int revoked_inode_setattr(struct dentry *direntry, struct iattr *attrs)
+{
+ return -EBADF;
+}
+
+static int revoked_inode_setxattr(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return -EBADF;
+}
+
+static ssize_t revoked_inode_getxattr(struct dentry *dentry, const char *name,
+ void *buffer, size_t size)
+{
+ return -EBADF;
+}
+
+static ssize_t revoked_inode_listxattr(struct dentry *dentry, char *buffer,
+ size_t buffer_size)
+{
+ return -EBADF;
+}
+
+static int revoked_inode_removexattr(struct dentry *dentry, const char *name)
+{
+ return -EBADF;
+}
+
+static struct inode_operations revoked_inode_ops = {
+ .create = revoked_inode_create,
+ .lookup = revoked_inode_lookup,
+ .link = revoked_inode_link,
+ .unlink = revoked_inode_unlink,
+ .symlink = revoked_inode_symlink,
+ .mkdir = revoked_inode_mkdir,
+ .rmdir = revoked_inode_rmdir,
+ .mknod = revoked_inode_mknod,
+ .rename = revoked_inode_rename,
+ .readlink = revoked_inode_readlink,
+ /* follow_link must be no-op, otherwise unmounting this inode
+ won't work */
+ /* put_link returns void */
+ /* truncate returns void */
+ .permission = revoked_inode_permission,
+ .getattr = revoked_inode_getattr,
+ .setattr = revoked_inode_setattr,
+ .setxattr = revoked_inode_setxattr,
+ .getxattr = revoked_inode_getxattr,
+ .listxattr = revoked_inode_listxattr,
+ .removexattr = revoked_inode_removexattr,
+ /* truncate_range returns void */
+};
+
+static loff_t revoked_special_file_llseek(struct file *file, loff_t offset,
+ int origin)
+{
+ return -ENXIO;
+}
+
+static ssize_t revoked_special_file_read(struct file *filp, char __user * buf,
+ size_t size, loff_t * ppos)
+{
+ return -ENXIO;
+}
+
+static ssize_t revoked_special_file_write(struct file *filp,
+ const char __user * buf, size_t siz,
+ loff_t * ppos)
+{
+ return -ENXIO;
+}
+
+static ssize_t revoked_special_file_aio_read(struct kiocb *iocb,
+ const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ return -ENXIO;
+}
+
+static ssize_t revoked_special_file_aio_write(struct kiocb *iocb,
+ const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_file_readdir(struct file *filp, void *dirent,
+ filldir_t filldir)
+{
+ return -ENXIO;
+}
+
+static unsigned int revoked_special_file_poll(struct file *filp,
+ poll_table * wait)
+{
+ return POLLERR;
+}
+
+static int revoked_special_file_ioctl(struct inode *inode, struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ return -ENXIO;
+}
+
+static long revoked_special_file_unlocked_ioctl(struct file *file, unsigned cmd,
+ unsigned long arg)
+{
+ return -ENXIO;
+}
+
+static long revoked_special_file_compat_ioctl(struct file *file,
+ unsigned int cmd,
+ unsigned long arg)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_file_mmap(struct file *file,
+ struct vm_area_struct *vma)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_file_open(struct inode *inode, struct file *filp)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_file_flush(struct file *file, fl_owner_t id)
+{
+ return 0;
+}
+
+static int revoked_special_file_release(struct inode *inode, struct file *filp)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_file_fsync(struct file *file, struct dentry *dentry,
+ int datasync)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_file_aio_fsync(struct kiocb *iocb, int datasync)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_file_fasync(int fd, struct file *filp, int on)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_file_lock(struct file *file, int cmd,
+ struct file_lock *fl)
+{
+ return -ENXIO;
+}
+
+static ssize_t revoked_special_file_sendfile(struct file *in_file,
+ loff_t * ppos, size_t count,
+ read_actor_t actor, void *target)
+{
+ return -ENXIO;
+}
+
+static ssize_t revoked_special_file_sendpage(struct file *file,
+ struct page *page, int off,
+ size_t len, loff_t * pos, int more)
+{
+ return -ENXIO;
+}
+
+static unsigned long revoked_special_file_get_unmapped_area(struct file *file,
+ unsigned long addr,
+ unsigned long len,
+ unsigned long pgoff,
+ unsigned long flags)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_file_check_flags(int flags)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_file_dir_notify(struct file *file, unsigned long arg)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_file_flock(struct file *filp, int cmd,
+ struct file_lock *fl)
+{
+ return -ENXIO;
+}
+
+static ssize_t revoked_special_file_splice_write(struct pipe_inode_info *pipe,
+ struct file *out,
+ loff_t * ppos, size_t len,
+ unsigned int flags)
+{
+ return -ENXIO;
+}
+
+static ssize_t revoked_special_file_splice_read(struct file *in, loff_t * ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags)
+{
+ return -ENXIO;
+}
+
+static const struct file_operations revoked_special_file_ops = {
+ .llseek = revoked_special_file_llseek,
+ .read = revoked_special_file_read,
+ .write = revoked_special_file_write,
+ .aio_read = revoked_special_file_aio_read,
+ .aio_write = revoked_special_file_aio_write,
+ .readdir = revoked_special_file_readdir,
+ .poll = revoked_special_file_poll,
+ .ioctl = revoked_special_file_ioctl,
+ .unlocked_ioctl = revoked_special_file_unlocked_ioctl,
+ .compat_ioctl = revoked_special_file_compat_ioctl,
+ .mmap = revoked_special_file_mmap,
+ .open = revoked_special_file_open,
+ .flush = revoked_special_file_flush,
+ .release = revoked_special_file_release,
+ .fsync = revoked_special_file_fsync,
+ .aio_fsync = revoked_special_file_aio_fsync,
+ .fasync = revoked_special_file_fasync,
+ .lock = revoked_special_file_lock,
+ .sendfile = revoked_special_file_sendfile,
+ .sendpage = revoked_special_file_sendpage,
+ .get_unmapped_area = revoked_special_file_get_unmapped_area,
+ .check_flags = revoked_special_file_check_flags,
+ .dir_notify = revoked_special_file_dir_notify,
+ .flock = revoked_special_file_flock,
+ .splice_write = revoked_special_file_splice_write,
+ .splice_read = revoked_special_file_splice_read,
+};
+
+static int revoked_special_inode_create(struct inode *dir,
+ struct dentry *dentry, int mode,
+ struct nameidata *nd)
+{
+ return -ENXIO;
+}
+
+static struct dentry *revoked_special_inode_lookup(struct inode *dir,
+ struct dentry *dentry,
+ struct nameidata *nd)
+{
+ return ERR_PTR(-ENXIO);
+}
+
+static int revoked_special_inode_link(struct dentry *old_dentry,
+ struct inode *dir, struct dentry *dentry)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_inode_unlink(struct inode *dir,
+ struct dentry *dentry)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_inode_symlink(struct inode *dir,
+ struct dentry *dentry,
+ const char *symname)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_inode_mkdir(struct inode *dir, struct dentry *dentry,
+ int mode)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_inode_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_inode_mknod(struct inode *dir, struct dentry *dentry,
+ int mode, dev_t rdev)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_inode_rename(struct inode *old_dir,
+ struct dentry *old_dentry,
+ struct inode *new_dir,
+ struct dentry *new_dentry)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_inode_readlink(struct dentry *dentry,
+ char __user * buffer, int buflen)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_inode_permission(struct inode *inode, int mask,
+ struct nameidata *nd)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_inode_getattr(struct vfsmount *mnt,
+ struct dentry *dentry,
+ struct kstat *stat)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_inode_setattr(struct dentry *direntry,
+ struct iattr *attrs)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_inode_setxattr(struct dentry *dentry,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ return -ENXIO;
+}
+
+static ssize_t revoked_special_inode_getxattr(struct dentry *dentry,
+ const char *name, void *buffer,
+ size_t size)
+{
+ return -ENXIO;
+}
+
+static ssize_t revoked_special_inode_listxattr(struct dentry *dentry,
+ char *buffer, size_t buffer_size)
+{
+ return -ENXIO;
+}
+
+static int revoked_special_inode_removexattr(struct dentry *dentry,
+ const char *name)
+{
+ return -ENXIO;
+}
+
+static struct inode_operations revoked_special_inode_ops = {
+ .create = revoked_special_inode_create,
+ .lookup = revoked_special_inode_lookup,
+ .link = revoked_special_inode_link,
+ .unlink = revoked_special_inode_unlink,
+ .symlink = revoked_special_inode_symlink,
+ .mkdir = revoked_special_inode_mkdir,
+ .rmdir = revoked_special_inode_rmdir,
+ .mknod = revoked_special_inode_mknod,
+ .rename = revoked_special_inode_rename,
+ .readlink = revoked_special_inode_readlink,
+ /* follow_link must be no-op, otherwise unmounting this inode
+ won't work */
+ /* put_link returns void */
+ /* truncate returns void */
+ .permission = revoked_special_inode_permission,
+ .getattr = revoked_special_inode_getattr,
+ .setattr = revoked_special_inode_setattr,
+ .setxattr = revoked_special_inode_setxattr,
+ .getxattr = revoked_special_inode_getxattr,
+ .listxattr = revoked_special_inode_listxattr,
+ .removexattr = revoked_special_inode_removexattr,
+ /* truncate_range returns void */
+};
+
+void make_revoked_inode(struct inode *inode, int mode)
+{
+ remove_inode_hash(inode);
+
+ inode->i_mode = mode;
+ inode->i_atime = inode->i_mtime = inode->i_ctime =
+ current_fs_time(inode->i_sb);
+
+ if (special_file(mode)) {
+ inode->i_op = &revoked_special_inode_ops;
+ inode->i_fop = &revoked_special_file_ops;
+ } else {
+ inode->i_op = &revoked_inode_ops;
+ inode->i_fop = &revoked_file_ops;
+ }
+}
Quoting Pekka J Enberg ([email protected]):
> From: Pekka Enberg <[email protected]>
>
> The revokeat(2) and frevoke(2) system calls invalidate open file
> descriptors and shared mappings of an inode. After an successful
> revocation, operations on file descriptors fail with the EBADF or
> ENXIO error code for regular and device files,
> respectively. Attempting to read from or write to a revoked mapping
> causes SIGBUS.
>
> The actual operation is done in two passes:
>
> 1. Revoke all file descriptors that point to the given inode. We do
> this under tasklist_lock so that after this pass, we don't need
> to worry about racing with close(2) or dup(2).
>
> 2. Take down shared memory mappings of each revoked file and close
> the file pointer.
>
> The file descriptors are kept until the owning task does close(2) and
> memory mapping ranges preserved until the owning task does munmap(2).
With the attached patch, this passes all your revoke tests on s390,
as well as ltp runalltests.sh.
thanks,
-serge
From: "Serge E. Hallyn" <[email protected]>
Subject: [PATCH] revoke: do s390 syscalls
revoke: do s390 syscalls
Signed-off-by: Serge E. Hallyn <[email protected]>
(cherry picked from 9962b52d7bd7b70b99e7515d30b8217b0de491b3 commit)
---
arch/s390/kernel/compat_wrapper.S | 11 +++++++++++
arch/s390/kernel/syscalls.S | 2 ++
include/asm-s390/unistd.h | 4 +++-
3 files changed, 16 insertions(+), 1 deletions(-)
3f0afa7b9a6b8c9ec930bdef73601f7260c79f47
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 71e54ef..b5c2bfa 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1665,3 +1665,14 @@ sys_getcpu_wrapper:
llgtr %r3,%r3 # unsigned *
llgtr %r4,%r4 # struct getcpu_cache *
jg sys_getcpu
+
+ .globl sys_revokeat_wrapper
+sys_revokeat_wrapper:
+ lgfr %r2,%r2 # int
+ llgtr %r3,%r3 # const char *
+ jg sys_revokeat
+
+ .globl sys_frevoke_wrapper
+sys_frevoke_wrapper:
+ llgfr %r2,%r2 # unsigned int
+ jg sys_frevoke
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index a4ceae3..85a6673 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -321,3 +321,5 @@ SYSCALL(sys_vmsplice,sys_vmsplice,compat
NI_SYSCALL /* 310 sys_move_pages */
SYSCALL(sys_getcpu,sys_getcpu,sys_getcpu_wrapper)
SYSCALL(sys_epoll_pwait,sys_epoll_pwait,sys_ni_syscall)
+SYSCALL(sys_revokeat,sys_revokeat,sys_revokeat_wrapper)
+SYSCALL(sys_frevoke,sys_frevoke,sys_frevoke_wrapper)
diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h
index fb6fef9..6651cb1 100644
--- a/include/asm-s390/unistd.h
+++ b/include/asm-s390/unistd.h
@@ -250,8 +250,10 @@
/* Number 310 is reserved for new sys_move_pages */
#define __NR_getcpu 311
#define __NR_epoll_pwait 312
+#define __NR_revokeat 313
+#define __NR_frevoke 314
-#define NR_syscalls 313
+#define NR_syscalls 315
/*
* There are some system calls that are not present on 64 bit, some
--
1.1.6
Hello,
> From: Pekka Enberg <[email protected]>
>
> The revokeat(2) and frevoke(2) system calls invalidate open file
> descriptors and shared mappings of an inode. After an successful
> revocation, operations on file descriptors fail with the EBADF or
> ENXIO error code for regular and device files,
> respectively. Attempting to read from or write to a revoked mapping
> causes SIGBUS.
>
> The actual operation is done in two passes:
>
> 1. Revoke all file descriptors that point to the given inode. We do
> this under tasklist_lock so that after this pass, we don't need
> to worry about racing with close(2) or dup(2).
>
> 2. Take down shared memory mappings of each revoked file and close
> the file pointer.
>
> The file descriptors are kept until the owning task does close(2) and
> memory mapping ranges preserved until the owning task does munmap(2).
>
> Signed-off-by: Pekka Enberg <[email protected]>
Have you considered using similar hack as bad_inode.c instead of
revoked_inode.c?
> Index: 2.6/fs/revoked_inode.c
> ===================================================================
> --- /dev/null
> +++ 2.6/fs/revoked_inode.c
> @@ -0,0 +1,664 @@
> +/*
> + * linux/fs/revoked_inode.c
> + *
> + * Copyright (C) 1997, Stephen Tweedie
> + *
> + * Provide stub functions for revoked inodes
> + */
> +
> +#include <linux/fs.h>
> +#include <linux/module.h>
> +#include <linux/stat.h>
> +#include <linux/time.h>
> +#include <linux/smp_lock.h>
> +#include <linux/namei.h>
> +#include <linux/poll.h>
> +
> +static loff_t revoked_file_llseek(struct file *file, loff_t offset, int origin)
> +{
> + return -EBADF;
> +}
> +
> +static ssize_t revoked_file_read(struct file *filp, char __user * buf,
> + size_t size, loff_t * ppos)
> +{
> + return -EBADF;
> +}
> +
> +static ssize_t revoked_file_write(struct file *filp, const char __user * buf,
> + size_t siz, loff_t * ppos)
> +{
> + return -EBADF;
> +}
> +
> +static ssize_t revoked_file_aio_read(struct kiocb *iocb,
> + const struct iovec *iov,
> + unsigned long nr_segs, loff_t pos)
> +{
> + return -EBADF;
> +}
> +
> +static ssize_t revoked_file_aio_write(struct kiocb *iocb,
> + const struct iovec *iov,
> + unsigned long nr_segs, loff_t pos)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_file_readdir(struct file *filp, void *dirent,
> + filldir_t filldir)
> +{
> + return -EBADF;
> +}
> +
> +static unsigned int revoked_file_poll(struct file *filp, poll_table * wait)
> +{
> + return POLLERR;
> +}
> +
> +static int revoked_file_ioctl(struct inode *inode, struct file *filp,
> + unsigned int cmd, unsigned long arg)
> +{
> + return -EBADF;
> +}
> +
> +static long revoked_file_unlocked_ioctl(struct file *file, unsigned cmd,
> + unsigned long arg)
> +{
> + return -EBADF;
> +}
> +
> +static long revoked_file_compat_ioctl(struct file *file, unsigned int cmd,
> + unsigned long arg)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_file_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_file_open(struct inode *inode, struct file *filp)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_file_flush(struct file *file, fl_owner_t id)
> +{
> + return 0;
> +}
> +
> +static int revoked_file_release(struct inode *inode, struct file *filp)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_file_fsync(struct file *file, struct dentry *dentry,
> + int datasync)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_file_aio_fsync(struct kiocb *iocb, int datasync)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_file_fasync(int fd, struct file *filp, int on)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_file_lock(struct file *file, int cmd, struct file_lock *fl)
> +{
> + return -EBADF;
> +}
> +
> +static ssize_t revoked_file_sendfile(struct file *in_file, loff_t * ppos,
> + size_t count, read_actor_t actor,
> + void *target)
> +{
> + return -EBADF;
> +}
> +
> +static ssize_t revoked_file_sendpage(struct file *file, struct page *page,
> + int off, size_t len, loff_t * pos,
> + int more)
> +{
> + return -EBADF;
> +}
> +
> +static unsigned long revoked_file_get_unmapped_area(struct file *file,
> + unsigned long addr,
> + unsigned long len,
> + unsigned long pgoff,
> + unsigned long flags)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_file_check_flags(int flags)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_file_dir_notify(struct file *file, unsigned long arg)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_file_flock(struct file *filp, int cmd, struct file_lock *fl)
> +{
> + return -EBADF;
> +}
> +
> +static ssize_t revoked_file_splice_write(struct pipe_inode_info *pipe,
> + struct file *out, loff_t * ppos,
> + size_t len, unsigned int flags)
> +{
> + return -EBADF;
> +}
> +
> +static ssize_t revoked_file_splice_read(struct file *in, loff_t * ppos,
> + struct pipe_inode_info *pipe,
> + size_t len, unsigned int flags)
> +{
> + return -EBADF;
> +}
> +
> +static const struct file_operations revoked_file_ops = {
> + .llseek = revoked_file_llseek,
> + .read = revoked_file_read,
> + .write = revoked_file_write,
> + .aio_read = revoked_file_aio_read,
> + .aio_write = revoked_file_aio_write,
> + .readdir = revoked_file_readdir,
> + .poll = revoked_file_poll,
> + .ioctl = revoked_file_ioctl,
> + .unlocked_ioctl = revoked_file_unlocked_ioctl,
> + .compat_ioctl = revoked_file_compat_ioctl,
> + .mmap = revoked_file_mmap,
> + .open = revoked_file_open,
> + .flush = revoked_file_flush,
> + .release = revoked_file_release,
> + .fsync = revoked_file_fsync,
> + .aio_fsync = revoked_file_aio_fsync,
> + .fasync = revoked_file_fasync,
> + .lock = revoked_file_lock,
> + .sendfile = revoked_file_sendfile,
> + .sendpage = revoked_file_sendpage,
> + .get_unmapped_area = revoked_file_get_unmapped_area,
> + .check_flags = revoked_file_check_flags,
> + .dir_notify = revoked_file_dir_notify,
> + .flock = revoked_file_flock,
> + .splice_write = revoked_file_splice_write,
> + .splice_read = revoked_file_splice_read,
> +};
> +
> +static int revoked_inode_create(struct inode *dir, struct dentry *dentry,
> + int mode, struct nameidata *nd)
> +{
> + return -EBADF;
> +}
> +
> +static struct dentry *revoked_inode_lookup(struct inode *dir,
> + struct dentry *dentry,
> + struct nameidata *nd)
> +{
> + return ERR_PTR(-EBADF);
> +}
> +
> +static int revoked_inode_link(struct dentry *old_dentry, struct inode *dir,
> + struct dentry *dentry)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_inode_unlink(struct inode *dir, struct dentry *dentry)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_inode_symlink(struct inode *dir, struct dentry *dentry,
> + const char *symname)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_inode_mkdir(struct inode *dir, struct dentry *dentry,
> + int mode)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_inode_rmdir(struct inode *dir, struct dentry *dentry)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_inode_mknod(struct inode *dir, struct dentry *dentry,
> + int mode, dev_t rdev)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_inode_rename(struct inode *old_dir,
> + struct dentry *old_dentry,
> + struct inode *new_dir,
> + struct dentry *new_dentry)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_inode_readlink(struct dentry *dentry, char __user * buffer,
> + int buflen)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_inode_permission(struct inode *inode, int mask,
> + struct nameidata *nd)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
> + struct kstat *stat)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_inode_setattr(struct dentry *direntry, struct iattr *attrs)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_inode_setxattr(struct dentry *dentry, const char *name,
> + const void *value, size_t size, int flags)
> +{
> + return -EBADF;
> +}
> +
> +static ssize_t revoked_inode_getxattr(struct dentry *dentry, const char *name,
> + void *buffer, size_t size)
> +{
> + return -EBADF;
> +}
> +
> +static ssize_t revoked_inode_listxattr(struct dentry *dentry, char *buffer,
> + size_t buffer_size)
> +{
> + return -EBADF;
> +}
> +
> +static int revoked_inode_removexattr(struct dentry *dentry, const char *name)
> +{
> + return -EBADF;
> +}
> +
> +static struct inode_operations revoked_inode_ops = {
> + .create = revoked_inode_create,
> + .lookup = revoked_inode_lookup,
> + .link = revoked_inode_link,
> + .unlink = revoked_inode_unlink,
> + .symlink = revoked_inode_symlink,
> + .mkdir = revoked_inode_mkdir,
> + .rmdir = revoked_inode_rmdir,
> + .mknod = revoked_inode_mknod,
> + .rename = revoked_inode_rename,
> + .readlink = revoked_inode_readlink,
> + /* follow_link must be no-op, otherwise unmounting this inode
> + won't work */
> + /* put_link returns void */
> + /* truncate returns void */
> + .permission = revoked_inode_permission,
> + .getattr = revoked_inode_getattr,
> + .setattr = revoked_inode_setattr,
> + .setxattr = revoked_inode_setxattr,
> + .getxattr = revoked_inode_getxattr,
> + .listxattr = revoked_inode_listxattr,
> + .removexattr = revoked_inode_removexattr,
> + /* truncate_range returns void */
> +};
> +
> +static loff_t revoked_special_file_llseek(struct file *file, loff_t offset,
> + int origin)
> +{
> + return -ENXIO;
> +}
> +
> +static ssize_t revoked_special_file_read(struct file *filp, char __user * buf,
> + size_t size, loff_t * ppos)
> +{
> + return -ENXIO;
> +}
> +
> +static ssize_t revoked_special_file_write(struct file *filp,
> + const char __user * buf, size_t siz,
> + loff_t * ppos)
> +{
> + return -ENXIO;
> +}
> +
> +static ssize_t revoked_special_file_aio_read(struct kiocb *iocb,
> + const struct iovec *iov,
> + unsigned long nr_segs, loff_t pos)
> +{
> + return -ENXIO;
> +}
> +
> +static ssize_t revoked_special_file_aio_write(struct kiocb *iocb,
> + const struct iovec *iov,
> + unsigned long nr_segs, loff_t pos)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_file_readdir(struct file *filp, void *dirent,
> + filldir_t filldir)
> +{
> + return -ENXIO;
> +}
> +
> +static unsigned int revoked_special_file_poll(struct file *filp,
> + poll_table * wait)
> +{
> + return POLLERR;
> +}
> +
> +static int revoked_special_file_ioctl(struct inode *inode, struct file *filp,
> + unsigned int cmd, unsigned long arg)
> +{
> + return -ENXIO;
> +}
> +
> +static long revoked_special_file_unlocked_ioctl(struct file *file, unsigned cmd,
> + unsigned long arg)
> +{
> + return -ENXIO;
> +}
> +
> +static long revoked_special_file_compat_ioctl(struct file *file,
> + unsigned int cmd,
> + unsigned long arg)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_file_mmap(struct file *file,
> + struct vm_area_struct *vma)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_file_open(struct inode *inode, struct file *filp)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_file_flush(struct file *file, fl_owner_t id)
> +{
> + return 0;
> +}
> +
> +static int revoked_special_file_release(struct inode *inode, struct file *filp)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_file_fsync(struct file *file, struct dentry *dentry,
> + int datasync)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_file_aio_fsync(struct kiocb *iocb, int datasync)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_file_fasync(int fd, struct file *filp, int on)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_file_lock(struct file *file, int cmd,
> + struct file_lock *fl)
> +{
> + return -ENXIO;
> +}
> +
> +static ssize_t revoked_special_file_sendfile(struct file *in_file,
> + loff_t * ppos, size_t count,
> + read_actor_t actor, void *target)
> +{
> + return -ENXIO;
> +}
> +
> +static ssize_t revoked_special_file_sendpage(struct file *file,
> + struct page *page, int off,
> + size_t len, loff_t * pos, int more)
> +{
> + return -ENXIO;
> +}
> +
> +static unsigned long revoked_special_file_get_unmapped_area(struct file *file,
> + unsigned long addr,
> + unsigned long len,
> + unsigned long pgoff,
> + unsigned long flags)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_file_check_flags(int flags)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_file_dir_notify(struct file *file, unsigned long arg)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_file_flock(struct file *filp, int cmd,
> + struct file_lock *fl)
> +{
> + return -ENXIO;
> +}
> +
> +static ssize_t revoked_special_file_splice_write(struct pipe_inode_info *pipe,
> + struct file *out,
> + loff_t * ppos, size_t len,
> + unsigned int flags)
> +{
> + return -ENXIO;
> +}
> +
> +static ssize_t revoked_special_file_splice_read(struct file *in, loff_t * ppos,
> + struct pipe_inode_info *pipe,
> + size_t len, unsigned int flags)
> +{
> + return -ENXIO;
> +}
> +
> +static const struct file_operations revoked_special_file_ops = {
> + .llseek = revoked_special_file_llseek,
> + .read = revoked_special_file_read,
> + .write = revoked_special_file_write,
> + .aio_read = revoked_special_file_aio_read,
> + .aio_write = revoked_special_file_aio_write,
> + .readdir = revoked_special_file_readdir,
> + .poll = revoked_special_file_poll,
> + .ioctl = revoked_special_file_ioctl,
> + .unlocked_ioctl = revoked_special_file_unlocked_ioctl,
> + .compat_ioctl = revoked_special_file_compat_ioctl,
> + .mmap = revoked_special_file_mmap,
> + .open = revoked_special_file_open,
> + .flush = revoked_special_file_flush,
> + .release = revoked_special_file_release,
> + .fsync = revoked_special_file_fsync,
> + .aio_fsync = revoked_special_file_aio_fsync,
> + .fasync = revoked_special_file_fasync,
> + .lock = revoked_special_file_lock,
> + .sendfile = revoked_special_file_sendfile,
> + .sendpage = revoked_special_file_sendpage,
> + .get_unmapped_area = revoked_special_file_get_unmapped_area,
> + .check_flags = revoked_special_file_check_flags,
> + .dir_notify = revoked_special_file_dir_notify,
> + .flock = revoked_special_file_flock,
> + .splice_write = revoked_special_file_splice_write,
> + .splice_read = revoked_special_file_splice_read,
> +};
> +
> +static int revoked_special_inode_create(struct inode *dir,
> + struct dentry *dentry, int mode,
> + struct nameidata *nd)
> +{
> + return -ENXIO;
> +}
> +
> +static struct dentry *revoked_special_inode_lookup(struct inode *dir,
> + struct dentry *dentry,
> + struct nameidata *nd)
> +{
> + return ERR_PTR(-ENXIO);
> +}
> +
> +static int revoked_special_inode_link(struct dentry *old_dentry,
> + struct inode *dir, struct dentry *dentry)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_inode_unlink(struct inode *dir,
> + struct dentry *dentry)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_inode_symlink(struct inode *dir,
> + struct dentry *dentry,
> + const char *symname)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_inode_mkdir(struct inode *dir, struct dentry *dentry,
> + int mode)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_inode_rmdir(struct inode *dir, struct dentry *dentry)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_inode_mknod(struct inode *dir, struct dentry *dentry,
> + int mode, dev_t rdev)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_inode_rename(struct inode *old_dir,
> + struct dentry *old_dentry,
> + struct inode *new_dir,
> + struct dentry *new_dentry)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_inode_readlink(struct dentry *dentry,
> + char __user * buffer, int buflen)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_inode_permission(struct inode *inode, int mask,
> + struct nameidata *nd)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_inode_getattr(struct vfsmount *mnt,
> + struct dentry *dentry,
> + struct kstat *stat)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_inode_setattr(struct dentry *direntry,
> + struct iattr *attrs)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_inode_setxattr(struct dentry *dentry,
> + const char *name, const void *value,
> + size_t size, int flags)
> +{
> + return -ENXIO;
> +}
> +
> +static ssize_t revoked_special_inode_getxattr(struct dentry *dentry,
> + const char *name, void *buffer,
> + size_t size)
> +{
> + return -ENXIO;
> +}
> +
> +static ssize_t revoked_special_inode_listxattr(struct dentry *dentry,
> + char *buffer, size_t buffer_size)
> +{
> + return -ENXIO;
> +}
> +
> +static int revoked_special_inode_removexattr(struct dentry *dentry,
> + const char *name)
> +{
> + return -ENXIO;
> +}
> +
> +static struct inode_operations revoked_special_inode_ops = {
> + .create = revoked_special_inode_create,
> + .lookup = revoked_special_inode_lookup,
> + .link = revoked_special_inode_link,
> + .unlink = revoked_special_inode_unlink,
> + .symlink = revoked_special_inode_symlink,
> + .mkdir = revoked_special_inode_mkdir,
> + .rmdir = revoked_special_inode_rmdir,
> + .mknod = revoked_special_inode_mknod,
> + .rename = revoked_special_inode_rename,
> + .readlink = revoked_special_inode_readlink,
> + /* follow_link must be no-op, otherwise unmounting this inode
> + won't work */
> + /* put_link returns void */
> + /* truncate returns void */
> + .permission = revoked_special_inode_permission,
> + .getattr = revoked_special_inode_getattr,
> + .setattr = revoked_special_inode_setattr,
> + .setxattr = revoked_special_inode_setxattr,
> + .getxattr = revoked_special_inode_getxattr,
> + .listxattr = revoked_special_inode_listxattr,
> + .removexattr = revoked_special_inode_removexattr,
> + /* truncate_range returns void */
> +};
> +
> +void make_revoked_inode(struct inode *inode, int mode)
> +{
> + remove_inode_hash(inode);
> +
> + inode->i_mode = mode;
> + inode->i_atime = inode->i_mtime = inode->i_ctime =
> + current_fs_time(inode->i_sb);
> +
> + if (special_file(mode)) {
> + inode->i_op = &revoked_special_inode_ops;
> + inode->i_fop = &revoked_special_file_ops;
> + } else {
> + inode->i_op = &revoked_inode_ops;
> + inode->i_fop = &revoked_file_ops;
> + }
> +}
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
Honza
--
Jan Kara <[email protected]>
SuSE CR Labs
Hi Honza,
On Wed, 7 Feb 2007, Jan Kara wrote:
> Have you considered using similar hack as bad_inode.c instead of
> revoked_inode.c?
I am not sure what you mean, revoked_inode.c looks pretty much the same as
bad_inode.c in mainline...
Pekka
On Wed 07-02-07 12:50:34, Pekka J Enberg wrote:
> Hi Honza,
>
> On Wed, 7 Feb 2007, Jan Kara wrote:
> > Have you considered using similar hack as bad_inode.c instead of
> > revoked_inode.c?
>
> I am not sure what you mean, revoked_inode.c looks pretty much the same as
> bad_inode.c in mainline...
Oops, sorry for confusion. I was looking into 2.6.19 and there the code
looks differently. In 2.6.20, obviously your method was preferred.
Honza
--
Jan Kara <[email protected]>
SuSE CR Labs
Whats the status on this, I was suprised to see something so important
just go dead ?
Alan
Hi Alan,
On 2/26/07, Alan <[email protected]> wrote:
> Whats the status on this, I was suprised to see something so important
> just go dead ?
It's not dead. You can find the latest patches here:
http://www.cs.helsinki.fi/u/penberg/linux/revoke/patches/
and user-space tests here:
http://www.cs.helsinki.fi/u/penberg/linux/revoke/utils/
What they are lacking is review so I am not sure how to proceed with
the patches.
Pekka
Pekka Enberg wrote:
> Hi Alan,
>
> On 2/26/07, Alan <[email protected]> wrote:
>> Whats the status on this, I was suprised to see something so important
>> just go dead ?
>
> It's not dead. You can find the latest patches here:
>
> http://www.cs.helsinki.fi/u/penberg/linux/revoke/patches/
>
> and user-space tests here:
>
> http://www.cs.helsinki.fi/u/penberg/linux/revoke/utils/
>
> What they are lacking is review so I am not sure how to proceed with
> the patches.
>
On this general subject, would it make sense to, for the future, have
system calls of the -at() form only, and specify that the f- system
calls take (fd, NULL) as the (file descriptor, filename) tuple?
-hpa
> I'm not sure. Turning, for example, the statat(dir_fd, name == NULL)
> error case into fstat(dir_fd) sounds like a way for apps, admittedly
> buggy ones, to be surprised. Maybe libc would be exptected to catch
> the error before performing the shared system call?
At that point would it not be cheaper to have two system calls, the table
cost isn't very large.
Alan wrote:
>> I'm not sure. Turning, for example, the statat(dir_fd, name == NULL)
>> error case into fstat(dir_fd) sounds like a way for apps, admittedly
>> buggy ones, to be surprised. Maybe libc would be exptected to catch
>> the error before performing the shared system call?
>
> At that point would it not be cheaper to have two system calls, the table
> cost isn't very large.
It's not just the table, though, you need two entry points, but even
that isn't really all that big either, I guess.
-hpa
On Feb 26, 2007, at 13:46:21, H. Peter Anvin wrote:
> Alan wrote:
>>> I'm not sure. Turning, for example, the statat(dir_fd, name ==
>>> NULL) error case into fstat(dir_fd) sounds like a way for apps,
>>> admittedly buggy ones, to be surprised. Maybe libc would be
>>> exptected to catch the error before performing the shared system
>>> call?
>> At that point would it not be cheaper to have two system calls,
>> the table cost isn't very large.
>
> It's not just the table, though, you need two entry points, but
> even that isn't really all that big either, I guess.
Well, I suppose there are multiple possibilities for consolidation:
frevokeat(fd, "/foo/bar/baz") => normal frevokeat
frevokeat(-1, "/foo/bar/baz") => revoke("/foo/bar/baz");
frevokeat(fd, NULL) => frevoke(fd);
Neither of those would ordinarily be considered to do anything useful
and for new syscalls I can't see the possibility of breaking existing
programs. On the other hand, it's not like we have any problems with
the syscall tables getting too large.
Cheers,
Kyle Moffett