2013-07-11 11:07:45

by Li Zhong

[permalink] [raw]
Subject: [RFC PATCH 0/2] partial revoke implementation for procfs

Hi Al Viro,

I tried to implement what you proposed in:
https://lkml.org/lkml/2013/4/5/15

When you have time, would you please help to take a look at it, and give your
comments?
I sent the draft early, just want to make sure I didn't misunderstand anything
in your proposal, and what I did and next step plan are not heading the wrong
direction.

This is only an initial draft to do only what procfs is needed

1. didn't do anything about kick, mmap, fasync, ... and something you mentioned
in following mails

2. I only wrapped f_ops in vfs which are used by procfs ( but maybe I still
missed something).

3. It seems all f_ops are const, so I couldn't easliy clear the pointer of
->owner, maybe that needs every calling site of proc_create(_data) to make sure
the proc_fops doesn't have ->owner set?

Currently, I added an ugly check in __fput, so if if ->f_revoke is set in file,
we don't call fput_ops; and in proc_reg_open(), restore the old ->f_op if
make_revokable() fails.

patch 1: adding the implementation proposed in your mail
patch 2: convert procfs to use this implementation

If there aren't any big issues, I plan to look for another file system
(with backing device) to try other things that's not implemented this time.

Thanks, Zhong

Li Zhong (2):
vfs: partial revoke implementation suggested by Al Viro
proc: covert procfs to use the general revoke implementation

fs/Makefile | 2 +-
fs/compat_ioctl.c | 8 +-
fs/eventpoll.c | 10 ++-
fs/file_table.c | 13 ++-
fs/ioctl.c | 7 +-
fs/proc/generic.c | 12 +--
fs/proc/inode.c | 229 ++++--------------------------------------------
fs/proc/internal.h | 9 +-
fs/read_write.c | 30 +++++--
fs/revoke.c | 133 ++++++++++++++++++++++++++++
fs/select.c | 11 ++-
include/linux/fs.h | 2 +
include/linux/revoke.h | 50 +++++++++++
mm/mmap.c | 8 +-
mm/nommu.c | 16 +++-
15 files changed, 297 insertions(+), 243 deletions(-)
create mode 100644 fs/revoke.c
create mode 100644 include/linux/revoke.h

--
1.7.9.5


2013-07-11 11:07:55

by Li Zhong

[permalink] [raw]
Subject: [PATCH 1/2] vfs: partial revoke implementation suggested by Al Viro

This patch tries to partially implement what Al Viro suggested in
https://lkml.org/lkml/2013/4/5/15

Code also mostly copied from the above link

Signed-off-by: Li Zhong <[email protected]>
---
fs/Makefile | 2 +-
fs/revoke.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/fs.h | 2 +
include/linux/revoke.h | 50 ++++++++++++++++++
4 files changed, 186 insertions(+), 1 deletion(-)
create mode 100644 fs/revoke.c
create mode 100644 include/linux/revoke.h

diff --git a/fs/Makefile b/fs/Makefile
index 4fe6df3..af0a622 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o splice.o sync.o utimes.o \
- stack.o fs_struct.o statfs.o
+ stack.o fs_struct.o statfs.o revoke.o

ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
diff --git a/fs/revoke.c b/fs/revoke.c
new file mode 100644
index 0000000..bcda9ba
--- /dev/null
+++ b/fs/revoke.c
@@ -0,0 +1,133 @@
+#include <linux/revoke.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+
+bool __start_using(struct revoke *revoke)
+{
+ struct revokable *r;
+ rcu_read_lock();
+ r = rcu_dereference(revoke->revokable);
+ if (unlikely(!r)) {
+ rcu_read_unlock();
+ return false; /* revoked */
+ }
+
+ if (likely(atomic_inc_unless_negative(&r->in_use))) {
+ rcu_read_unlock();
+ return true; /* we are using it now */
+ }
+
+ rcu_read_unlock();
+ return false; /* it's being revoked right now */
+}
+
+#define BIAS (-1U<<31)
+
+void __stop_using(struct revoke *revoke)
+{
+ struct revokable *r;
+ r = rcu_dereference_protected(revoke->revokable, 1);
+ BUG_ON(!r);
+ if (atomic_dec_return(&r->in_use) == BIAS)
+ complete(r->c);
+}
+
+/* called with r->lock held by caller, unlocks it */
+static void __release_revoke(struct revokable *r, struct revoke *revoke)
+{
+ if (revoke->closing) {
+ DECLARE_COMPLETION_ONSTACK(c);
+ revoke->c = &c;
+ spin_unlock(&r->lock);
+ wait_for_completion(&c);
+ } else {
+ struct file *file;
+ revoke->closing = 1;
+ spin_unlock(&r->lock);
+ file = revoke->file;
+ if (file->f_op->release)
+ file->f_op->release(file_inode(file), file);
+ spin_lock(&r->lock);
+ hlist_del_init(&revoke->node);
+ rcu_assign_pointer(revoke->revokable, NULL);
+ rcu_read_lock(); /* prevent freeing of r */
+ if (revoke->c)
+ complete(revoke->c);
+ spin_unlock(&r->lock);
+ rcu_read_unlock();
+ }
+}
+
+void release_revoke(struct revoke *revoke)
+{
+ struct revokable *r;
+ rcu_read_lock();
+ r = rcu_dereference(revoke->revokable);
+ if (!r) {
+ /* already closed by revokation */
+ rcu_read_unlock();
+ goto out;
+ }
+
+ spin_lock(&r->lock);
+ if (unlikely(hlist_unhashed(&revoke->node))) {
+ /* just been revoked */
+ spin_unlock(&r->lock);
+ rcu_read_unlock();
+ goto out;
+ }
+
+ /*
+ * Ok, revoke_it() couldn't have been finished yet
+ * it'll have to get r->lock before it's through, so
+ * we can drop rcu_read_lock
+ */
+ rcu_read_unlock();
+ __release_revoke(r, revoke);
+out:
+ kfree(revoke);
+}
+
+void revoke_it(struct revokable *r)
+{
+ DECLARE_COMPLETION_ONSTACK(c);
+ r->c = &c;
+ if (atomic_add_return(BIAS, &r->in_use) != BIAS) {
+ if (r->kick)
+ r->kick(r);
+ wait_for_completion(&c);
+ }
+
+ while (1) {
+ struct hlist_node *p;
+ spin_lock(&r->lock);
+ p = r->list.first;
+ if (!p)
+ break;
+ __release_revoke(r, hlist_entry(p, struct revoke, node));
+ }
+ spin_unlock(&r->lock);
+}
+
+int make_revokable(struct file *f, struct revokable *r)
+{
+ struct revoke *revoke = kzalloc(sizeof(struct revoke), GFP_KERNEL);
+ if (!revoke)
+ return -ENOMEM;
+
+ if (!atomic_inc_unless_negative(&r->in_use)) {
+ kfree(revoke);
+ return -ENOENT;
+ }
+
+ revoke->file = f;
+ revoke->revokable = r;
+ f->f_revoke = revoke;
+
+ spin_lock(&r->lock);
+ hlist_add_head(&revoke->node, &r->list);
+ spin_unlock(&r->lock);
+
+ __stop_using(revoke);
+ return 0;
+}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 99be011..4ec9437 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -45,6 +45,7 @@ struct vfsmount;
struct cred;
struct swap_info_struct;
struct seq_file;
+struct revoke;

extern void __init inode_init(void);
extern void __init inode_init_early(void);
@@ -807,6 +808,7 @@ struct file {
#ifdef CONFIG_DEBUG_WRITECOUNT
unsigned long f_mnt_write_state;
#endif
+ struct revoke *f_revoke;
};

struct file_handle {
diff --git a/include/linux/revoke.h b/include/linux/revoke.h
new file mode 100644
index 0000000..263569b
--- /dev/null
+++ b/include/linux/revoke.h
@@ -0,0 +1,50 @@
+#ifndef _LINUX_REVOKE_H
+#define _LINUX_REVOKE_H
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/completion.h>
+#include <linux/fs.h>
+
+struct revokable {
+ atomic_t in_use; /* number of threads in methods,*/
+ /* negative => going away */
+ spinlock_t lock;
+ struct hlist_head list; /* protected by ->lock, goes through */
+ /* struct revoke->node */
+ struct completion *c;
+ void (*kick)(struct revokable *);
+};
+
+struct revoke {
+ struct file *file;
+ struct revokable *revokable;
+ struct hlist_node node;
+ bool closing;
+ struct completion *c;
+};
+
+bool __start_using(struct revoke *revoke);
+void __stop_using(struct revoke *revoke);
+
+static inline bool start_using(struct file *f)
+{
+ struct revoke *revoke = f->f_revoke;
+ if (likely(!revoke))
+ return true; /* non-revokable file */
+ return __start_using(revoke);
+}
+
+static inline void stop_using(struct file *f)
+{
+ struct revoke *revoke = f->f_revoke;
+ if (unlikely(revoke))
+ __stop_using(revoke);
+}
+
+void release_revoke(struct revoke *revoke);
+void revoke_it(struct revokable *r);
+int make_revokable(struct file *f, struct revokable *r);
+
+#endif /* __LINUX_REVOKE_H */
--
1.7.9.5

2013-07-11 11:08:06

by Li Zhong

[permalink] [raw]
Subject: [PATCH 2/2] proc: covert procfs to use the general revoke implementation

This patch tries to replace the current revoke logic in procfs with the
implementation suggested by Al Viro in
https://lkml.org/lkml/2013/4/5/15

Below is the replacement guideline copied from that mail:

procfs would have struct revokable embedded into proc_dir_entry, with freeing
of those guys RCUd. It would set ->f_op to ->proc_fops and call
make_revokable(file, &pde->revokable) in proc_reg_open(); no wrappers for other
methods needed anymore. All file_operations instances fed to
proc_create() et.al. would lose ->owner - it's already not needed for those,
actually. remove_proc_entry()/remove_proc_subtree() would call revoke_it()
on everything we are removing.

Signed-off-by: Li Zhong <[email protected]>
---
fs/compat_ioctl.c | 8 +-
fs/eventpoll.c | 10 ++-
fs/file_table.c | 13 ++-
fs/ioctl.c | 7 +-
fs/proc/generic.c | 12 +--
fs/proc/inode.c | 229 +++++-----------------------------------------------
fs/proc/internal.h | 9 +--
fs/read_write.c | 30 +++++--
fs/select.c | 11 ++-
mm/mmap.c | 8 +-
mm/nommu.c | 16 +++-
11 files changed, 111 insertions(+), 242 deletions(-)

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 5d19acf..48da3bf 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -110,6 +110,7 @@
#include <linux/dvb/video.h>

#include <linux/sort.h>
+#include <linux/revoke.h>

#ifdef CONFIG_SPARC
#include <asm/fbio.h>
@@ -1584,7 +1585,12 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,

default:
if (f.file->f_op && f.file->f_op->compat_ioctl) {
- error = f.file->f_op->compat_ioctl(f.file, cmd, arg);
+ if (likely(start_using(f.file))) {
+ error = f.file->f_op->compat_ioctl(f.file,
+ cmd, arg);
+ stop_using(f.file);
+ } else
+ error = -ENOTTY;
if (error != -ENOIOCTLCMD)
goto out_fput;
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9ad17b15..c73e8af 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -42,6 +42,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/compat.h>
+#include <linux/revoke.h>

/*
* LOCKING:
@@ -776,9 +777,16 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file)

static inline unsigned int ep_item_poll(struct epitem *epi, poll_table *pt)
{
+ unsigned int rv = DEFAULT_POLLMASK;
pt->_key = epi->event.events;

- return epi->ffd.file->f_op->poll(epi->ffd.file, pt) & epi->event.events;
+ if (likely(start_using(epi->ffd.file))) {
+ rv = epi->ffd.file->f_op->poll(epi->ffd.file, pt)
+ & epi->event.events;
+ stop_using(epi->ffd.file);
+ }
+
+ return rv;
}

static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
diff --git a/fs/file_table.c b/fs/file_table.c
index 08e719b..b3d55e0 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -26,6 +26,7 @@
#include <linux/hardirq.h>
#include <linux/task_work.h>
#include <linux/ima.h>
+#include <linux/revoke.h>

#include <linux/atomic.h>

@@ -244,14 +245,20 @@ static void __fput(struct file *file)
file->f_op->fasync(-1, file, 0);
}
ima_file_free(file);
- if (file->f_op && file->f_op->release)
- file->f_op->release(inode, file);
+ if (file->f_op && file->f_op->release) {
+ if (likely(!(file->f_revoke)))
+ file->f_op->release(inode, file);
+ else
+ release_revoke(file->f_revoke);
+ }
security_file_free(file);
if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
!(file->f_mode & FMODE_PATH))) {
cdev_put(inode->i_cdev);
}
- fops_put(file->f_op);
+ if (likely(!(file->f_revoke)))
+ fops_put(file->f_op);
+
put_pid(file->f_owner.pid);
if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_dec(inode);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index fd507fb..7610377 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,6 +15,7 @@
#include <linux/writeback.h>
#include <linux/buffer_head.h>
#include <linux/falloc.h>
+#include <linux/revoke.h>

#include <asm/ioctls.h>

@@ -40,7 +41,11 @@ static long vfs_ioctl(struct file *filp, unsigned int cmd,
if (!filp->f_op || !filp->f_op->unlocked_ioctl)
goto out;

- error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
+ if (likely(start_using(filp))) {
+ error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
+ stop_using(filp);
+ }
+
if (error == -ENOIOCTLCMD)
error = -ENOTTY;
out:
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 94441a4..3119937 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -23,6 +23,7 @@
#include <linux/bitops.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
+#include <linux/revoke.h>
#include <asm/uaccess.h>

#include "internal.h"
@@ -367,13 +368,14 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
if (!ent)
goto out;

+ spin_lock_init(&ent->revokable.lock);
+ INIT_HLIST_HEAD(&ent->revokable.list);
+
memcpy(ent->name, fn, len + 1);
ent->namelen = len;
ent->mode = mode;
ent->nlink = nlink;
atomic_set(&ent->count, 1);
- spin_lock_init(&ent->pde_unload_lock);
- INIT_LIST_HEAD(&ent->pde_openers);
out:
return ent;
}
@@ -488,7 +490,7 @@ static void free_proc_entry(struct proc_dir_entry *de)

if (S_ISLNK(de->mode))
kfree(de->data);
- kfree(de);
+ kfree_rcu(de, rcu);
}

void pde_put(struct proc_dir_entry *pde)
@@ -528,7 +530,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
return;
}

- proc_entry_rundown(de);
+ revoke_it(&de->revokable);

if (S_ISDIR(de->mode))
parent->nlink--;
@@ -577,7 +579,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
}
spin_unlock(&proc_subdir_lock);

- proc_entry_rundown(de);
+ revoke_it(&de->revokable);
next = de->parent;
if (S_ISDIR(de->mode))
next->nlink--;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 073aea6..17c0a66 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -12,7 +12,6 @@
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/completion.h>
-#include <linux/poll.h>
#include <linux/printk.h>
#include <linux/file.h>
#include <linux/limits.h>
@@ -23,6 +22,7 @@
#include <linux/slab.h>
#include <linux/mount.h>
#include <linux/magic.h>
+#include <linux/revoke.h>

#include <asm/uaccess.h>

@@ -130,168 +130,20 @@ static const struct super_operations proc_sops = {
.show_options = proc_show_options,
};

-enum {BIAS = -1U<<31};
-
-static inline int use_pde(struct proc_dir_entry *pde)
-{
- return atomic_inc_unless_negative(&pde->in_use);
-}
-
-static void unuse_pde(struct proc_dir_entry *pde)
-{
- if (atomic_dec_return(&pde->in_use) == BIAS)
- complete(pde->pde_unload_completion);
-}
-
-/* pde is locked */
-static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
-{
- if (pdeo->closing) {
- /* somebody else is doing that, just wait */
- DECLARE_COMPLETION_ONSTACK(c);
- pdeo->c = &c;
- spin_unlock(&pde->pde_unload_lock);
- wait_for_completion(&c);
- spin_lock(&pde->pde_unload_lock);
- } else {
- struct file *file;
- pdeo->closing = 1;
- spin_unlock(&pde->pde_unload_lock);
- file = pdeo->file;
- pde->proc_fops->release(file_inode(file), file);
- spin_lock(&pde->pde_unload_lock);
- list_del_init(&pdeo->lh);
- if (pdeo->c)
- complete(pdeo->c);
- kfree(pdeo);
- }
-}
-
-void proc_entry_rundown(struct proc_dir_entry *de)
-{
- DECLARE_COMPLETION_ONSTACK(c);
- /* Wait until all existing callers into module are done. */
- de->pde_unload_completion = &c;
- if (atomic_add_return(BIAS, &de->in_use) != BIAS)
- wait_for_completion(&c);
-
- spin_lock(&de->pde_unload_lock);
- while (!list_empty(&de->pde_openers)) {
- struct pde_opener *pdeo;
- pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
- close_pdeo(de, pdeo);
- }
- spin_unlock(&de->pde_unload_lock);
-}
-
-static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
-{
- struct proc_dir_entry *pde = PDE(file_inode(file));
- loff_t rv = -EINVAL;
- if (use_pde(pde)) {
- loff_t (*llseek)(struct file *, loff_t, int);
- llseek = pde->proc_fops->llseek;
- if (!llseek)
- llseek = default_llseek;
- rv = llseek(file, offset, whence);
- unuse_pde(pde);
- }
- return rv;
-}
-
-static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
-{
- ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
- struct proc_dir_entry *pde = PDE(file_inode(file));
- ssize_t rv = -EIO;
- if (use_pde(pde)) {
- read = pde->proc_fops->read;
- if (read)
- rv = read(file, buf, count, ppos);
- unuse_pde(pde);
- }
- return rv;
-}
-
-static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
-{
- ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
- struct proc_dir_entry *pde = PDE(file_inode(file));
- ssize_t rv = -EIO;
- if (use_pde(pde)) {
- write = pde->proc_fops->write;
- if (write)
- rv = write(file, buf, count, ppos);
- unuse_pde(pde);
- }
- return rv;
-}
-
-static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts)
-{
- struct proc_dir_entry *pde = PDE(file_inode(file));
- unsigned int rv = DEFAULT_POLLMASK;
- unsigned int (*poll)(struct file *, struct poll_table_struct *);
- if (use_pde(pde)) {
- poll = pde->proc_fops->poll;
- if (poll)
- rv = poll(file, pts);
- unuse_pde(pde);
- }
- return rv;
-}
-
-static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- struct proc_dir_entry *pde = PDE(file_inode(file));
- long rv = -ENOTTY;
- long (*ioctl)(struct file *, unsigned int, unsigned long);
- if (use_pde(pde)) {
- ioctl = pde->proc_fops->unlocked_ioctl;
- if (ioctl)
- rv = ioctl(file, cmd, arg);
- unuse_pde(pde);
- }
- return rv;
-}
-
-#ifdef CONFIG_COMPAT
-static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- struct proc_dir_entry *pde = PDE(file_inode(file));
- long rv = -ENOTTY;
- long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
- if (use_pde(pde)) {
- compat_ioctl = pde->proc_fops->compat_ioctl;
- if (compat_ioctl)
- rv = compat_ioctl(file, cmd, arg);
- unuse_pde(pde);
- }
- return rv;
-}
-#endif
-
-static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
-{
- struct proc_dir_entry *pde = PDE(file_inode(file));
- int rv = -EIO;
- int (*mmap)(struct file *, struct vm_area_struct *);
- if (use_pde(pde)) {
- mmap = pde->proc_fops->mmap;
- if (mmap)
- rv = mmap(file, vma);
- unuse_pde(pde);
- }
- return rv;
-}
-
static int proc_reg_open(struct inode *inode, struct file *file)
{
struct proc_dir_entry *pde = PDE(inode);
int rv = 0;
int (*open)(struct inode *, struct file *);
int (*release)(struct inode *, struct file *);
- struct pde_opener *pdeo;
+ const struct file_operations *old_fop = file->f_op;
+
+ open = pde->proc_fops->open;
+ release = pde->proc_fops->release;
+
+ file->f_op = pde->proc_fops;
+ if (open)
+ rv = open(inode, file);

/*
* What for, you ask? Well, we can have open, rmmod, remove_proc_entry
@@ -303,73 +155,28 @@ static int proc_reg_open(struct inode *inode, struct file *file)
* by hand in remove_proc_entry(). For this, save opener's credentials
* for later.
*/
- pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL);
- if (!pdeo)
- return -ENOMEM;
-
- if (!use_pde(pde)) {
- kfree(pdeo);
- return -ENOENT;
- }
- open = pde->proc_fops->open;
- release = pde->proc_fops->release;

- if (open)
- rv = open(inode, file);
+ if (!rv) {
+ rv = make_revokable(file, &pde->revokable);

- if (rv == 0 && release) {
- /* To know what to release. */
- pdeo->file = file;
- /* Strictly for "too late" ->release in proc_reg_release(). */
- spin_lock(&pde->pde_unload_lock);
- list_add(&pdeo->lh, &pde->pde_openers);
- spin_unlock(&pde->pde_unload_lock);
- } else
- kfree(pdeo);
-
- unuse_pde(pde);
- return rv;
-}
-
-static int proc_reg_release(struct inode *inode, struct file *file)
-{
- struct proc_dir_entry *pde = PDE(inode);
- struct pde_opener *pdeo;
- spin_lock(&pde->pde_unload_lock);
- list_for_each_entry(pdeo, &pde->pde_openers, lh) {
- if (pdeo->file == file) {
- close_pdeo(pde, pdeo);
- break;
+ if (rv) {
+ /* temporarily for ->owner issue */
+ file->f_op = old_fop;
+ if (release)
+ release(inode, file);
}
}
- spin_unlock(&pde->pde_unload_lock);
- return 0;
+
+ return rv;
}

static const struct file_operations proc_reg_file_ops = {
- .llseek = proc_reg_llseek,
- .read = proc_reg_read,
- .write = proc_reg_write,
- .poll = proc_reg_poll,
- .unlocked_ioctl = proc_reg_unlocked_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = proc_reg_compat_ioctl,
-#endif
- .mmap = proc_reg_mmap,
.open = proc_reg_open,
- .release = proc_reg_release,
};

#ifdef CONFIG_COMPAT
static const struct file_operations proc_reg_file_ops_no_compat = {
- .llseek = proc_reg_llseek,
- .read = proc_reg_read,
- .write = proc_reg_write,
- .poll = proc_reg_poll,
- .unlocked_ioctl = proc_reg_unlocked_ioctl,
- .mmap = proc_reg_mmap,
.open = proc_reg_open,
- .release = proc_reg_release,
};
#endif

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 651d09a..6c6c0c9 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -14,6 +14,7 @@
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <linux/binfmts.h>
+#include <linux/revoke.h>

struct ctl_table_header;
struct mempolicy;
@@ -41,11 +42,8 @@ struct proc_dir_entry {
struct proc_dir_entry *next, *parent, *subdir;
void *data;
atomic_t count; /* use count */
- atomic_t in_use; /* number of callers into module in progress; */
- /* negative -> it's going away RSN */
- struct completion *pde_unload_completion;
- struct list_head pde_openers; /* who did ->open, but not ->release */
- spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
+ struct revokable revokable;
+ struct rcu_head rcu;
u8 namelen;
char name[];
};
@@ -208,7 +206,6 @@ extern const struct inode_operations proc_pid_link_inode_operations;
extern void proc_init_inodecache(void);
extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
extern int proc_fill_super(struct super_block *);
-extern void proc_entry_rundown(struct proc_dir_entry *);

/*
* proc_devtree.c
diff --git a/fs/read_write.c b/fs/read_write.c
index 122a384..b9ed1fd 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -17,6 +17,7 @@
#include <linux/pagemap.h>
#include <linux/splice.h>
#include <linux/compat.h>
+#include <linux/revoke.h>
#include "internal.h"

#include <asm/uaccess.h>
@@ -254,13 +255,20 @@ EXPORT_SYMBOL(default_llseek);
loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
{
loff_t (*fn)(struct file *, loff_t, int);
+ loff_t rv = -EINVAL;

fn = no_llseek;
if (file->f_mode & FMODE_LSEEK) {
if (file->f_op && file->f_op->llseek)
fn = file->f_op->llseek;
}
- return fn(file, offset, whence);
+
+ if (likely(start_using(file))) {
+ rv = fn(file, offset, whence);
+ stop_using(file);
+ }
+
+ return rv;
}
EXPORT_SYMBOL(vfs_llseek);

@@ -393,9 +401,13 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
ret = rw_verify_area(READ, file, pos, count);
if (ret >= 0) {
count = ret;
- if (file->f_op->read)
- ret = file->f_op->read(file, buf, count, pos);
- else
+ if (file->f_op->read) {
+ if (likely(start_using(file))) {
+ ret = file->f_op->read(file, buf, count, pos);
+ stop_using(file);
+ } else
+ ret = -EIO;
+ } else
ret = do_sync_read(file, buf, count, pos);
if (ret > 0) {
fsnotify_access(file);
@@ -471,9 +483,13 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
if (ret >= 0) {
count = ret;
file_start_write(file);
- if (file->f_op->write)
- ret = file->f_op->write(file, buf, count, pos);
- else
+ if (file->f_op->write) {
+ if (likely(start_using(file))) {
+ ret = file->f_op->write(file, buf, count, pos);
+ stop_using(file);
+ } else
+ ret = -EIO;
+ } else
ret = do_sync_write(file, buf, count, pos);
if (ret > 0) {
fsnotify_modify(file);
diff --git a/fs/select.c b/fs/select.c
index 6b14dc7..16e1e37 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -28,6 +28,7 @@
#include <linux/hrtimer.h>
#include <linux/sched/rt.h>
#include <linux/freezer.h>
+#include <linux/revoke.h>

#include <asm/uaccess.h>

@@ -452,7 +453,10 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
mask = DEFAULT_POLLMASK;
if (f_op && f_op->poll) {
wait_key_set(wait, in, out, bit);
- mask = (*f_op->poll)(f.file, wait);
+ if (likely(start_using(f.file))) {
+ mask = (*f_op->poll)(f.file, wait);
+ stop_using(f.file);
+ }
}
fdput(f);
if ((mask & POLLIN_SET) && (in & bit)) {
@@ -733,7 +737,10 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
mask = DEFAULT_POLLMASK;
if (f.file->f_op && f.file->f_op->poll) {
pwait->_key = pollfd->events|POLLERR|POLLHUP;
- mask = f.file->f_op->poll(f.file, pwait);
+ if (likely(start_using(f.file))) {
+ mask = f.file->f_op->poll(f.file, pwait);
+ stop_using(f.file);
+ }
}
/* Mask out unneeded events. */
mask &= pollfd->events | POLLERR | POLLHUP;
diff --git a/mm/mmap.c b/mm/mmap.c
index 8468ffd..5988ebe 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -36,6 +36,7 @@
#include <linux/sched/sysctl.h>
#include <linux/notifier.h>
#include <linux/memory.h>
+#include <linux/revoke.h>

#include <asm/uaccess.h>
#include <asm/cacheflush.h>
@@ -1554,7 +1555,12 @@ munmap_back:
correct_wcount = 1;
}
vma->vm_file = get_file(file);
- error = file->f_op->mmap(file, vma);
+ if (likely(start_using(file))) {
+ error = file->f_op->mmap(file, vma);
+ stop_using(file);
+ } else
+ error = -EIO;
+
if (error)
goto unmap_and_free_vma;

diff --git a/mm/nommu.c b/mm/nommu.c
index e44e6e0..0853fe2 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -30,6 +30,7 @@
#include <linux/syscalls.h>
#include <linux/audit.h>
#include <linux/sched/sysctl.h>
+#include <linux/revoke.h>

#include <asm/uaccess.h>
#include <asm/tlb.h>
@@ -1116,9 +1117,13 @@ static unsigned long determine_vm_flags(struct file *file,
*/
static int do_mmap_shared_file(struct vm_area_struct *vma)
{
- int ret;
+ int ret = -EIO;
+
+ if (likely(start_using(vma->vm_file))) {
+ ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
+ stop_using(vma->vm_file);
+ }

- ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
if (ret == 0) {
vma->vm_region->vm_top = vma->vm_region->vm_end;
return 0;
@@ -1143,14 +1148,17 @@ static int do_mmap_private(struct vm_area_struct *vma,
struct page *pages;
unsigned long total, point, n;
void *base;
- int ret, order;
+ int ret = -EIO, order;

/* invoke the file's mapping function so that it can keep track of
* shared mappings on devices or memory
* - VM_MAYSHARE will be set if it may attempt to share
*/
if (capabilities & BDI_CAP_MAP_DIRECT) {
- ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
+ if (likely(start_using(vma->vm_file))) {
+ ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
+ stop_using(vma->vm_file);
+ }
if (ret == 0) {
/* shouldn't return success if we're not sharing */
BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
--
1.7.9.5