Nothing prevents a dentry found by path lookup before a return of
__debugfs_remove() to actually get opened after that return. Now, after
the return of __debugfs_remove(), there are no guarantees whatsoever
regarding the memory the corresponding inode's file_operations object
had been kept in.
Since __debugfs_remove() is seldomly invoked, usually from module exit
handlers only, the race is hard to trigger and the impact is very low.
A discussion of the problem outlined above as well as a suggested
solution can be found in the (sub-)thread rooted at
http://lkml.kernel.org/g/[email protected]
("Yet another pipe related oops.")
Basically, Greg KH suggests to introduce an intermediate fops and
Al Viro points out that a pointer to the original ones may be stored in
->d_fsdata.
Follow this line of reasoning:
- Add SRCU as a reverse dependency of DEBUG_FS.
- Introduce a srcu_struct object for the debugfs subsystem.
- In debugfs_create_file(), store a pointer to the original
file_operations object in ->d_fsdata.
- In __debugfs_remove(), clear out that dentry->d_fsdata member again.
debugfs_remove() and debugfs_remove_recursive() wait for a SRCU grace
period before returning to their caller.
- Introduce an intermediate file_operations object named
"debugfs_proxy_file_operations". It's ->open() functions checks,
under the protection of a SRCU read lock, whether the "original"
file_operations pointed to by ->d_fsdata is still valid and if so,
tries to acquire a reference on the owning module. On success, it sets
the file object's ->f_op to the original file_operations and forwards
the ongoing open() call to the original ->open().
- For clarity, rename the former debugfs_file_operations to
debugfs_noop_file_operations -- they are in no way canonical.
The choice of SRCU over "normal" RCU is justified by the fact, that the
former may also be used to protect ->i_private data from going away
during the execution of a file's readers and writers which may (and do)
sleep.
Signed-off-by: Nicolai Stange <[email protected]>
---
Applicable to the Linus tree.
The second of the two patches depends on the first one
fs/debugfs/file.c | 29 ++++++++++++++++++++++++++++-
fs/debugfs/inode.c | 16 +++++++++++++---
include/linux/debugfs.h | 6 +++++-
lib/Kconfig.debug | 1 +
4 files changed, 47 insertions(+), 5 deletions(-)
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index d2ba12e..67df2c9 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -35,13 +35,40 @@ static ssize_t default_write_file(struct file *file, const char __user *buf,
return count;
}
-const struct file_operations debugfs_file_operations = {
+const struct file_operations debugfs_noop_file_operations = {
.read = default_read_file,
.write = default_write_file,
.open = simple_open,
.llseek = noop_llseek,
};
+static int proxy_open(struct inode *inode, struct file *filp)
+{
+ struct dentry * const dentry = filp->f_path.dentry;
+ const struct file_operations __rcu *rcu_real_fops;
+ const struct file_operations *real_fops;
+ int srcu_idx;
+
+ srcu_idx = srcu_read_lock(&debugfs_srcu);
+ rcu_real_fops = (const struct file_operations __rcu *)dentry->d_fsdata;
+ real_fops = srcu_dereference(rcu_real_fops, &debugfs_srcu);
+ real_fops = fops_get(real_fops);
+ srcu_read_unlock(&debugfs_srcu, srcu_idx);
+
+ if (!real_fops)
+ return -ENOENT;
+
+ replace_fops(filp, real_fops);
+ if (filp->f_op->open)
+ return filp->f_op->open(inode, filp);
+
+ return 0;
+}
+
+const struct file_operations debugfs_proxy_file_operations = {
+ .open = proxy_open,
+};
+
static struct dentry *debugfs_create_mode(const char *name, umode_t mode,
struct dentry *parent, void *value,
const struct file_operations *fops,
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b7fcc0d..8ae2e1a 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -30,6 +30,8 @@
#define DEBUGFS_DEFAULT_MODE 0700
+DEFINE_SRCU(debugfs_srcu);
+
static struct vfsmount *debugfs_mount;
static int debugfs_mount_count;
static bool debugfs_registered;
@@ -340,8 +342,12 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode,
return failed_creating(dentry);
inode->i_mode = mode;
- inode->i_fop = fops ? fops : &debugfs_file_operations;
inode->i_private = data;
+
+ inode->i_fop = fops ? &debugfs_proxy_file_operations
+ : &debugfs_noop_file_operations;
+ rcu_assign_pointer(dentry->d_fsdata, (void *)fops);
+
d_instantiate(dentry, inode);
fsnotify_create(d_inode(dentry->d_parent), dentry);
return end_creating(dentry);
@@ -523,10 +529,12 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
if (simple_positive(dentry)) {
dget(dentry);
- if (d_is_dir(dentry))
+ if (d_is_dir(dentry)) {
ret = simple_rmdir(d_inode(parent), dentry);
- else
+ } else {
simple_unlink(d_inode(parent), dentry);
+ rcu_assign_pointer(dentry->d_fsdata, NULL);
+ }
if (!ret)
d_delete(dentry);
dput(dentry);
@@ -565,6 +573,7 @@ void debugfs_remove(struct dentry *dentry)
mutex_unlock(&d_inode(parent)->i_mutex);
if (!ret)
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+ synchronize_srcu(&debugfs_srcu);
}
EXPORT_SYMBOL_GPL(debugfs_remove);
@@ -642,6 +651,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
if (!__debugfs_remove(child, parent))
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
mutex_unlock(&d_inode(parent)->i_mutex);
+ synchronize_srcu(&debugfs_srcu);
}
EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 19c066d..f8c7494 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -19,6 +19,7 @@
#include <linux/seq_file.h>
#include <linux/types.h>
+#include <linux/srcu.h>
struct device;
struct file_operations;
@@ -44,7 +45,10 @@ extern struct dentry *arch_debugfs_dir;
#if defined(CONFIG_DEBUG_FS)
/* declared over in file.c */
-extern const struct file_operations debugfs_file_operations;
+extern const struct file_operations debugfs_noop_file_operations;
+extern const struct file_operations debugfs_proxy_file_operations;
+
+extern struct srcu_struct debugfs_srcu;
struct dentry *debugfs_create_file(const char *name, umode_t mode,
struct dentry *parent, void *data,
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8c15b29..3929878 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -257,6 +257,7 @@ config PAGE_OWNER
config DEBUG_FS
bool "Debug Filesystem"
+ select SRCU
help
debugfs is a virtual file system that kernel developers use to put
debugging files into. Enable this option to be able to read and
--
2.6.3
Upon return of debugfs_remove()/debugfs_remove_recursive(), it might
still be attempted to access associated private file data through
previously opened struct file objects. If that data has been freed by
the caller of debugfs_remove*() in the meanwhile, the reading/writing
process would either encounter a fault or, if the memory address in
question has been reassigned again, unrelated data structures could get
overwritten.
However, since debugfs files are seldomly removed, usually from module
exit handlers only, the impact is very low.
Since debugfs_remove() and debugfs_remove_recursive() are already
waiting for a SRCU grace period before returning to their callers,
enclosing the access to private file data from ->read() and ->write()
within a SRCU read-side critical section does the trick:
- Introduce the debugfs_file_use_data_start() and
debugfs_file_use_data_finish() helpers which just enter and leave
a SRCU read-side critical section. The former also reports whether the
file is still alive, that is if d_delete() has _not_ been called on
the corresponding dentry.
- Introduce the DEFINE_DEBUGFS_ATTRIBUTE() macro which is completely
equivalent to the DEFINE_SIMPLE_ATTRIBUTE() macro except that
->read() and ->write are set to SRCU protecting wrappers around the
original simple_read() and simple_write() helpers.
- Use that DEFINE_DEBUGFS_ATTRIBUTE() macro for all debugfs_create_*()
attribute creation variants where appropriate.
- Manually introduce SRCU protection to the debugfs-predefined readers
and writers not covered by the above DEFINE_SIMPLE_ATTRIBUTE()->
DEFINE_DEBUGFS_ATTRIBUTE() replacement.
Finally, it should be worth to note that in the vast majority of cases
where debugfs users are handing in a "custom" struct file_operations
object to debugfs_create_file(), an attribute's associated data's
lifetime is bound to the one of the containing module and thus,
taking a reference on ->owner during file opening acts as a proxy here.
There is no need to do a mass replace of DEFINE_SIMPLE_ATTRIBUTE() to
DEFINE_DEBUGFS_ATTRIBUTE() outside of debugfs.
Signed-off-by: Nicolai Stange <[email protected]>
---
Applicable to the Linus tree.
The second of the two patches depends on the first one
Documentation/DocBook/filesystems.tmpl | 1 +
fs/debugfs/file.c | 185 +++++++++++++++++++++++++--------
fs/debugfs/inode.c | 1 +
include/linux/debugfs.h | 66 ++++++++++++
4 files changed, 209 insertions(+), 44 deletions(-)
diff --git a/Documentation/DocBook/filesystems.tmpl b/Documentation/DocBook/filesystems.tmpl
index 6006b63..1f2f6c0 100644
--- a/Documentation/DocBook/filesystems.tmpl
+++ b/Documentation/DocBook/filesystems.tmpl
@@ -99,6 +99,7 @@
<sect1 id="debugfs_interface"><title>debugfs interface</title>
!Efs/debugfs/inode.c
!Efs/debugfs/file.c
+!Iinclude/linux/debugfs.h
</sect1>
</chapter>
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 67df2c9..8e10695 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -69,6 +69,45 @@ const struct file_operations debugfs_proxy_file_operations = {
.open = proxy_open,
};
+int __debugfs_file_use_data_start(struct file *file, int *srcu_idx)
+ __acquires(&debugfs_srcu)
+{
+ *srcu_idx = srcu_read_lock(&debugfs_srcu);
+ if (d_unlinked(file->f_path.dentry))
+ return -EIO;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__debugfs_file_use_data_start);
+
+ssize_t debugfs_attr_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ ssize_t ret;
+ int srcu_idx;
+
+ ret = debugfs_file_use_data_start(file, &srcu_idx);
+ if (!ret)
+ ret = simple_attr_read(file, buf, len, ppos);
+ debugfs_file_use_data_finish(srcu_idx);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(debugfs_attr_read);
+
+ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ ssize_t ret;
+ int srcu_idx;
+
+ ret = debugfs_file_use_data_start(file, &srcu_idx);
+ if (!ret)
+ ret = simple_attr_write(file, buf, len, ppos);
+ debugfs_file_use_data_finish(srcu_idx);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(debugfs_attr_write);
+
static struct dentry *debugfs_create_mode(const char *name, umode_t mode,
struct dentry *parent, void *value,
const struct file_operations *fops,
@@ -95,9 +134,9 @@ static int debugfs_u8_get(void *data, u64 *val)
*val = *(u8 *)data;
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_u8_ro, debugfs_u8_get, NULL, "%llu\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_u8_ro, debugfs_u8_get, NULL, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n");
/**
* debugfs_create_u8 - create a debugfs file that is used to read and write an unsigned 8-bit value
@@ -141,9 +180,9 @@ static int debugfs_u16_get(void *data, u64 *val)
*val = *(u16 *)data;
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_u16_ro, debugfs_u16_get, NULL, "%llu\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_u16_ro, debugfs_u16_get, NULL, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n");
/**
* debugfs_create_u16 - create a debugfs file that is used to read and write an unsigned 16-bit value
@@ -187,9 +226,9 @@ static int debugfs_u32_get(void *data, u64 *val)
*val = *(u32 *)data;
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_u32_ro, debugfs_u32_get, NULL, "%llu\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_u32_ro, debugfs_u32_get, NULL, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n");
/**
* debugfs_create_u32 - create a debugfs file that is used to read and write an unsigned 32-bit value
@@ -234,9 +273,9 @@ static int debugfs_u64_get(void *data, u64 *val)
*val = *(u64 *)data;
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_u64_ro, debugfs_u64_get, NULL, "%llu\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_ro, debugfs_u64_get, NULL, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
/**
* debugfs_create_u64 - create a debugfs file that is used to read and write an unsigned 64-bit value
@@ -281,9 +320,10 @@ static int debugfs_ulong_get(void *data, u64 *val)
*val = *(unsigned long *)data;
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_ulong, debugfs_ulong_get, debugfs_ulong_set, "%llu\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_ulong_ro, debugfs_ulong_get, NULL, "%llu\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_ulong_wo, NULL, debugfs_ulong_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong, debugfs_ulong_get, debugfs_ulong_set,
+ "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong_ro, debugfs_ulong_get, NULL, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong_wo, NULL, debugfs_ulong_set, "%llu\n");
/**
* debugfs_create_ulong - create a debugfs file that is used to read and write
@@ -318,21 +358,24 @@ struct dentry *debugfs_create_ulong(const char *name, umode_t mode,
}
EXPORT_SYMBOL_GPL(debugfs_create_ulong);
-DEFINE_SIMPLE_ATTRIBUTE(fops_x8, debugfs_u8_get, debugfs_u8_set, "0x%02llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_x8_ro, debugfs_u8_get, NULL, "0x%02llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_x8_wo, NULL, debugfs_u8_set, "0x%02llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_x8, debugfs_u8_get, debugfs_u8_set, "0x%02llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_x8_ro, debugfs_u8_get, NULL, "0x%02llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_x8_wo, NULL, debugfs_u8_set, "0x%02llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_x16, debugfs_u16_get, debugfs_u16_set, "0x%04llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_x16_ro, debugfs_u16_get, NULL, "0x%04llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_x16_wo, NULL, debugfs_u16_set, "0x%04llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_x16, debugfs_u16_get, debugfs_u16_set,
+ "0x%04llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_x16_ro, debugfs_u16_get, NULL, "0x%04llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_x16_wo, NULL, debugfs_u16_set, "0x%04llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set,
+ "0x%08llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set, "0x%016llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_x64_ro, debugfs_u64_get, NULL, "0x%016llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_x64_wo, NULL, debugfs_u64_set, "0x%016llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set,
+ "0x%016llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_x64_ro, debugfs_u64_get, NULL, "0x%016llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_x64_wo, NULL, debugfs_u64_set, "0x%016llx\n");
/*
* debugfs_create_x{8,16,32,64} - create a debugfs file that is used to read and write an unsigned {8,16,32,64}-bit value
@@ -425,10 +468,10 @@ static int debugfs_size_t_get(void *data, u64 *val)
*val = *(size_t *)data;
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_size_t, debugfs_size_t_get, debugfs_size_t_set,
+DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t, debugfs_size_t_get, debugfs_size_t_set,
"%llu\n"); /* %llu and %zu are more or less the same */
-DEFINE_SIMPLE_ATTRIBUTE(fops_size_t_ro, debugfs_size_t_get, NULL, "%llu\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_size_t_wo, NULL, debugfs_size_t_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t_ro, debugfs_size_t_get, NULL, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t_wo, NULL, debugfs_size_t_set, "%llu\n");
/**
* debugfs_create_size_t - create a debugfs file that is used to read and write an size_t value
@@ -458,10 +501,12 @@ static int debugfs_atomic_t_get(void *data, u64 *val)
*val = atomic_read((atomic_t *)data);
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get,
+DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get,
debugfs_atomic_t_set, "%lld\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t_ro, debugfs_atomic_t_get, NULL, "%lld\n");
-DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t_wo, NULL, debugfs_atomic_t_set, "%lld\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_ro, debugfs_atomic_t_get, NULL,
+ "%lld\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_wo, NULL, debugfs_atomic_t_set,
+ "%lld\n");
/**
* debugfs_create_atomic_t - create a debugfs file that is used to read and
@@ -487,11 +532,18 @@ ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf,
{
char buf[3];
bool *val = file->private_data;
+ int ret, srcu_idx;
+ ret = debugfs_file_use_data_start(file, &srcu_idx);
+ if (ret) {
+ debugfs_file_use_data_finish(srcu_idx);
+ return ret;
+ }
if (*val)
buf[0] = 'Y';
else
buf[0] = 'N';
+ debugfs_file_use_data_finish(srcu_idx);
buf[1] = '\n';
buf[2] = 0x00;
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
@@ -505,16 +557,26 @@ ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf,
size_t buf_size;
bool bv;
bool *val = file->private_data;
+ ssize_t ret;
+ int srcu_idx;
buf_size = min(count, (sizeof(buf)-1));
if (copy_from_user(buf, user_buf, buf_size))
return -EFAULT;
buf[buf_size] = '\0';
- if (strtobool(buf, &bv) == 0)
- *val = bv;
+ if (strtobool(buf, &bv) == 0) {
+ ret = debugfs_file_use_data_start(file, &srcu_idx);
+ if (!ret) {
+ *val = bv;
+ ret = count;
+ }
+ debugfs_file_use_data_finish(srcu_idx);
+ } else {
+ return -EINVAL;
+ }
- return count;
+ return ret;
}
EXPORT_SYMBOL_GPL(debugfs_write_file_bool);
@@ -572,9 +634,16 @@ EXPORT_SYMBOL_GPL(debugfs_create_bool);
static ssize_t read_file_blob(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
+ ssize_t ret;
+ int srcu_idx;
struct debugfs_blob_wrapper *blob = file->private_data;
- return simple_read_from_buffer(user_buf, count, ppos, blob->data,
- blob->size);
+
+ ret = debugfs_file_use_data_start(file, &srcu_idx);
+ if (!ret)
+ ret = simple_read_from_buffer(user_buf, count, ppos, blob->data,
+ blob->size);
+ debugfs_file_use_data_finish(srcu_idx);
+ return ret;
}
static const struct file_operations fops_blob = {
@@ -643,6 +712,7 @@ static int u32_array_open(struct inode *inode, struct file *file)
struct array_data *data = inode->i_private;
int size, elements = data->elements;
char *buf;
+ int ret, srcu_idx;
/*
* Max size:
@@ -656,9 +726,17 @@ static int u32_array_open(struct inode *inode, struct file *file)
buf[size] = 0;
file->private_data = buf;
+ ret = debugfs_file_use_data_start(file, &srcu_idx);
+ if (ret) {
+ kfree(file->private_data);
+ goto out;
+ }
u32_format_array(buf, size, data->array, data->elements);
+ nonseekable_open(inode, file);
- return nonseekable_open(inode, file);
+out:
+ debugfs_file_use_data_finish(srcu_idx);
+ return ret;
}
static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len,
@@ -761,15 +839,21 @@ EXPORT_SYMBOL_GPL(debugfs_print_regs32);
static int debugfs_show_regset32(struct seq_file *s, void *data)
{
- struct debugfs_regset32 *regset = s->private;
+ struct file *f = s->private;
+ struct debugfs_regset32 *regset = file_inode(f)->i_private;
+ int ret, srcu_idx;
- debugfs_print_regs32(s, regset->regs, regset->nregs, regset->base, "");
- return 0;
+ ret = debugfs_file_use_data_start(f, &srcu_idx);
+ if (!ret)
+ debugfs_print_regs32(s, regset->regs, regset->nregs,
+ regset->base, "");
+ debugfs_file_use_data_finish(srcu_idx);
+ return ret;
}
static int debugfs_open_regset32(struct inode *inode, struct file *file)
{
- return single_open(file, debugfs_show_regset32, inode->i_private);
+ return single_open(file, debugfs_show_regset32, file);
}
static const struct file_operations fops_regset32 = {
@@ -826,11 +910,24 @@ static int debugfs_devm_entry_open(struct inode *inode, struct file *f)
return single_open(f, entry->read, entry->dev);
}
+static ssize_t debugfs_devm_entry_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ ssize_t ret;
+ int srcu_idx;
+
+ ret = debugfs_file_use_data_start(file, &srcu_idx);
+ if (!ret)
+ ret = seq_read(file, buf, size, ppos);
+ debugfs_file_use_data_finish(srcu_idx);
+ return ret;
+}
+
static const struct file_operations debugfs_devm_entry_ops = {
.owner = THIS_MODULE,
.open = debugfs_devm_entry_open,
.release = single_release,
- .read = seq_read,
+ .read = debugfs_devm_entry_read,
.llseek = seq_lseek
};
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 8ae2e1a..37222c9 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -31,6 +31,7 @@
#define DEBUGFS_DEFAULT_MODE 0700
DEFINE_SRCU(debugfs_srcu);
+EXPORT_SYMBOL_GPL(debugfs_srcu);
static struct vfsmount *debugfs_mount;
static int debugfs_mount_count;
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index f8c7494..ba1a299 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -20,6 +20,7 @@
#include <linux/types.h>
#include <linux/srcu.h>
+#include <linux/compiler.h>
struct device;
struct file_operations;
@@ -128,6 +129,71 @@ ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf,
ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos);
+int __debugfs_file_use_data_start(struct file *file, int *srcu_idx)
+ __acquires(&debugfs_srcu);
+
+/**
+ * debugfs_file_use_data_start - mark the beginning of file data access
+ * @file: the file object whose data is being accessed.
+ * @srcu_idx: a pointer to some memory to store a SRCU index in.
+ *
+ * Up to a matching call to debugfs_file_use_data_finish(), any
+ * successive call into the file removing functions debugfs_remove()
+ * and debugfs_remove_recursive() will block. Since associated private
+ * file data may only get freed after a successful return of any of
+ * the removal functions, you may safely access it after a successful
+ * call to debugfs_file_use_data_start() without worrying about
+ * lifetime issues.
+ *
+ * If -%EIO is returned, the file has already been removed and thus,
+ * it is not safe to access any of its data. If, on the other hand,
+ * it is allowed to access the file data, zero is returned.
+ *
+ * Regardless of the return code, any call to
+ * debugfs_file_use_data_start() must be followed by a matching call
+ * to debugfs_file_use_data_finish().
+ */
+static inline int debugfs_file_use_data_start(struct file *file, int *srcu_idx)
+ __acquires(&debugfs_srcu)
+{
+ return __debugfs_file_use_data_start(file, srcu_idx);
+}
+
+/**
+ * debugfs_file_use_data_finish - mark the end of file data access
+ * @srcu_idx: the SRCU index "created" by a former call to
+ * debugfs_file_use_data_start().
+ *
+ * Allow any ongoing concurrent call into debugfs_remove() or
+ * debugfs_remove_recursive() blocked by a former call to
+ * debugfs_file_use_data_start() to proceed and return to its caller.
+ */
+static inline void debugfs_file_use_data_finish(int srcu_idx)
+ __releases(&debugfs_srcu)
+{
+ srcu_read_unlock(&debugfs_srcu, srcu_idx);
+}
+
+ssize_t debugfs_attr_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos);
+ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos);
+
+#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \
+static int __fops ## _open(struct inode *inode, struct file *file) \
+{ \
+ __simple_attr_check_format(__fmt, 0ull); \
+ return simple_attr_open(inode, file, __get, __set, __fmt); \
+} \
+static const struct file_operations __fops = { \
+ .owner = THIS_MODULE, \
+ .open = __fops ## _open, \
+ .release = simple_attr_release, \
+ .read = debugfs_attr_read, \
+ .write = debugfs_attr_write, \
+ .llseek = generic_file_llseek, \
+}
+
#else
#include <linux/err.h>
--
2.6.3
On Tue, Dec 01, 2015 at 12:21:31AM +0100, Nicolai Stange wrote:
> Nothing prevents a dentry found by path lookup before a return of
> __debugfs_remove() to actually get opened after that return. Now, after
> the return of __debugfs_remove(), there are no guarantees whatsoever
> regarding the memory the corresponding inode's file_operations object
> had been kept in.
>
> Since __debugfs_remove() is seldomly invoked, usually from module exit
> handlers only, the race is hard to trigger and the impact is very low.
>
> A discussion of the problem outlined above as well as a suggested
> solution can be found in the (sub-)thread rooted at
>
> http://lkml.kernel.org/g/[email protected]
> ("Yet another pipe related oops.")
>
> Basically, Greg KH suggests to introduce an intermediate fops and
> Al Viro points out that a pointer to the original ones may be stored in
> ->d_fsdata.
Nice work, thanks for doing this. I'll review it in a week or so when
I've caught up on my huge pending patch queue...
greg k-h
On Tue, Dec 01, 2015 at 12:21:31AM +0100, Nicolai Stange wrote:
> Nothing prevents a dentry found by path lookup before a return of
> __debugfs_remove() to actually get opened after that return. Now, after
> the return of __debugfs_remove(), there are no guarantees whatsoever
> regarding the memory the corresponding inode's file_operations object
> had been kept in.
>
> Since __debugfs_remove() is seldomly invoked, usually from module exit
> handlers only, the race is hard to trigger and the impact is very low.
>
> A discussion of the problem outlined above as well as a suggested
> solution can be found in the (sub-)thread rooted at
>
> http://lkml.kernel.org/g/[email protected]
> ("Yet another pipe related oops.")
>
> Basically, Greg KH suggests to introduce an intermediate fops and
> Al Viro points out that a pointer to the original ones may be stored in
> ->d_fsdata.
>
> Follow this line of reasoning:
> - Add SRCU as a reverse dependency of DEBUG_FS.
> - Introduce a srcu_struct object for the debugfs subsystem.
> - In debugfs_create_file(), store a pointer to the original
> file_operations object in ->d_fsdata.
> - In __debugfs_remove(), clear out that dentry->d_fsdata member again.
> debugfs_remove() and debugfs_remove_recursive() wait for a SRCU grace
> period before returning to their caller.
> - Introduce an intermediate file_operations object named
> "debugfs_proxy_file_operations". It's ->open() functions checks,
> under the protection of a SRCU read lock, whether the "original"
> file_operations pointed to by ->d_fsdata is still valid and if so,
> tries to acquire a reference on the owning module. On success, it sets
> the file object's ->f_op to the original file_operations and forwards
> the ongoing open() call to the original ->open().
> - For clarity, rename the former debugfs_file_operations to
> debugfs_noop_file_operations -- they are in no way canonical.
>
> The choice of SRCU over "normal" RCU is justified by the fact, that the
> former may also be used to protect ->i_private data from going away
> during the execution of a file's readers and writers which may (and do)
> sleep.
>
> Signed-off-by: Nicolai Stange <[email protected]>
One question below. Other than that, looks good from an RCU perspective.
Thanx, Paul
> ---
> Applicable to the Linus tree.
> The second of the two patches depends on the first one
>
> fs/debugfs/file.c | 29 ++++++++++++++++++++++++++++-
> fs/debugfs/inode.c | 16 +++++++++++++---
> include/linux/debugfs.h | 6 +++++-
> lib/Kconfig.debug | 1 +
> 4 files changed, 47 insertions(+), 5 deletions(-)
>
> diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
> index d2ba12e..67df2c9 100644
> --- a/fs/debugfs/file.c
> +++ b/fs/debugfs/file.c
> @@ -35,13 +35,40 @@ static ssize_t default_write_file(struct file *file, const char __user *buf,
> return count;
> }
>
> -const struct file_operations debugfs_file_operations = {
> +const struct file_operations debugfs_noop_file_operations = {
> .read = default_read_file,
> .write = default_write_file,
> .open = simple_open,
> .llseek = noop_llseek,
> };
>
> +static int proxy_open(struct inode *inode, struct file *filp)
> +{
> + struct dentry * const dentry = filp->f_path.dentry;
> + const struct file_operations __rcu *rcu_real_fops;
> + const struct file_operations *real_fops;
> + int srcu_idx;
> +
> + srcu_idx = srcu_read_lock(&debugfs_srcu);
> + rcu_real_fops = (const struct file_operations __rcu *)dentry->d_fsdata;
> + real_fops = srcu_dereference(rcu_real_fops, &debugfs_srcu);
> + real_fops = fops_get(real_fops);
> + srcu_read_unlock(&debugfs_srcu, srcu_idx);
> +
> + if (!real_fops)
> + return -ENOENT;
> +
> + replace_fops(filp, real_fops);
> + if (filp->f_op->open)
> + return filp->f_op->open(inode, filp);
> +
> + return 0;
> +}
> +
> +const struct file_operations debugfs_proxy_file_operations = {
> + .open = proxy_open,
> +};
> +
> static struct dentry *debugfs_create_mode(const char *name, umode_t mode,
> struct dentry *parent, void *value,
> const struct file_operations *fops,
> diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
> index b7fcc0d..8ae2e1a 100644
> --- a/fs/debugfs/inode.c
> +++ b/fs/debugfs/inode.c
> @@ -30,6 +30,8 @@
>
> #define DEBUGFS_DEFAULT_MODE 0700
>
> +DEFINE_SRCU(debugfs_srcu);
> +
> static struct vfsmount *debugfs_mount;
> static int debugfs_mount_count;
> static bool debugfs_registered;
> @@ -340,8 +342,12 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode,
> return failed_creating(dentry);
>
> inode->i_mode = mode;
> - inode->i_fop = fops ? fops : &debugfs_file_operations;
> inode->i_private = data;
> +
> + inode->i_fop = fops ? &debugfs_proxy_file_operations
> + : &debugfs_noop_file_operations;
> + rcu_assign_pointer(dentry->d_fsdata, (void *)fops);
> +
> d_instantiate(dentry, inode);
> fsnotify_create(d_inode(dentry->d_parent), dentry);
> return end_creating(dentry);
> @@ -523,10 +529,12 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
>
> if (simple_positive(dentry)) {
> dget(dentry);
> - if (d_is_dir(dentry))
> + if (d_is_dir(dentry)) {
> ret = simple_rmdir(d_inode(parent), dentry);
> - else
> + } else {
> simple_unlink(d_inode(parent), dentry);
> + rcu_assign_pointer(dentry->d_fsdata, NULL);
> + }
> if (!ret)
> d_delete(dentry);
> dput(dentry);
> @@ -565,6 +573,7 @@ void debugfs_remove(struct dentry *dentry)
> mutex_unlock(&d_inode(parent)->i_mutex);
> if (!ret)
> simple_release_fs(&debugfs_mount, &debugfs_mount_count);
> + synchronize_srcu(&debugfs_srcu);
So the reason that this is OK from a performance perspective is that
people removing large quantities of debugfs entries are supposed to
use debugfs_remove_recursive()?
If there are valid use cases with lots of debugfs_remove() calls
in quick succession, and if this results in excessive latency due
to lots of synchronize_srcu() calls, then one approach would be to
have a debugfs_remove_nowait() or some such that omitted the final
synchronize_srcu(). The last call could use debugfs_remove() to clean
things up. There are of course a number of alternative approaches.
But if there is no performance issue with real workloads, then there
is of course no need to do anything.
> }
> EXPORT_SYMBOL_GPL(debugfs_remove);
>
> @@ -642,6 +651,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
> if (!__debugfs_remove(child, parent))
> simple_release_fs(&debugfs_mount, &debugfs_mount_count);
> mutex_unlock(&d_inode(parent)->i_mutex);
> + synchronize_srcu(&debugfs_srcu);
> }
> EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
>
> diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
> index 19c066d..f8c7494 100644
> --- a/include/linux/debugfs.h
> +++ b/include/linux/debugfs.h
> @@ -19,6 +19,7 @@
> #include <linux/seq_file.h>
>
> #include <linux/types.h>
> +#include <linux/srcu.h>
>
> struct device;
> struct file_operations;
> @@ -44,7 +45,10 @@ extern struct dentry *arch_debugfs_dir;
> #if defined(CONFIG_DEBUG_FS)
>
> /* declared over in file.c */
> -extern const struct file_operations debugfs_file_operations;
> +extern const struct file_operations debugfs_noop_file_operations;
> +extern const struct file_operations debugfs_proxy_file_operations;
> +
> +extern struct srcu_struct debugfs_srcu;
>
> struct dentry *debugfs_create_file(const char *name, umode_t mode,
> struct dentry *parent, void *data,
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 8c15b29..3929878 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -257,6 +257,7 @@ config PAGE_OWNER
>
> config DEBUG_FS
> bool "Debug Filesystem"
> + select SRCU
> help
> debugfs is a virtual file system that kernel developers use to put
> debugging files into. Enable this option to be able to read and
> --
> 2.6.3
>
On Tue, Dec 01, 2015 at 12:26:41AM +0100, Nicolai Stange wrote:
> Upon return of debugfs_remove()/debugfs_remove_recursive(), it might
> still be attempted to access associated private file data through
> previously opened struct file objects. If that data has been freed by
> the caller of debugfs_remove*() in the meanwhile, the reading/writing
> process would either encounter a fault or, if the memory address in
> question has been reassigned again, unrelated data structures could get
> overwritten.
>
> However, since debugfs files are seldomly removed, usually from module
> exit handlers only, the impact is very low.
>
> Since debugfs_remove() and debugfs_remove_recursive() are already
> waiting for a SRCU grace period before returning to their callers,
> enclosing the access to private file data from ->read() and ->write()
> within a SRCU read-side critical section does the trick:
> - Introduce the debugfs_file_use_data_start() and
> debugfs_file_use_data_finish() helpers which just enter and leave
> a SRCU read-side critical section. The former also reports whether the
> file is still alive, that is if d_delete() has _not_ been called on
> the corresponding dentry.
> - Introduce the DEFINE_DEBUGFS_ATTRIBUTE() macro which is completely
> equivalent to the DEFINE_SIMPLE_ATTRIBUTE() macro except that
> ->read() and ->write are set to SRCU protecting wrappers around the
> original simple_read() and simple_write() helpers.
> - Use that DEFINE_DEBUGFS_ATTRIBUTE() macro for all debugfs_create_*()
> attribute creation variants where appropriate.
> - Manually introduce SRCU protection to the debugfs-predefined readers
> and writers not covered by the above DEFINE_SIMPLE_ATTRIBUTE()->
> DEFINE_DEBUGFS_ATTRIBUTE() replacement.
>
> Finally, it should be worth to note that in the vast majority of cases
> where debugfs users are handing in a "custom" struct file_operations
> object to debugfs_create_file(), an attribute's associated data's
> lifetime is bound to the one of the containing module and thus,
> taking a reference on ->owner during file opening acts as a proxy here.
> There is no need to do a mass replace of DEFINE_SIMPLE_ATTRIBUTE() to
> DEFINE_DEBUGFS_ATTRIBUTE() outside of debugfs.
>
> Signed-off-by: Nicolai Stange <[email protected]>
Also looks good from an RCU perspective.
Thanx, Paul
> ---
> Applicable to the Linus tree.
> The second of the two patches depends on the first one
>
> Documentation/DocBook/filesystems.tmpl | 1 +
> fs/debugfs/file.c | 185 +++++++++++++++++++++++++--------
> fs/debugfs/inode.c | 1 +
> include/linux/debugfs.h | 66 ++++++++++++
> 4 files changed, 209 insertions(+), 44 deletions(-)
>
> diff --git a/Documentation/DocBook/filesystems.tmpl b/Documentation/DocBook/filesystems.tmpl
> index 6006b63..1f2f6c0 100644
> --- a/Documentation/DocBook/filesystems.tmpl
> +++ b/Documentation/DocBook/filesystems.tmpl
> @@ -99,6 +99,7 @@
> <sect1 id="debugfs_interface"><title>debugfs interface</title>
> !Efs/debugfs/inode.c
> !Efs/debugfs/file.c
> +!Iinclude/linux/debugfs.h
> </sect1>
> </chapter>
>
> diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
> index 67df2c9..8e10695 100644
> --- a/fs/debugfs/file.c
> +++ b/fs/debugfs/file.c
> @@ -69,6 +69,45 @@ const struct file_operations debugfs_proxy_file_operations = {
> .open = proxy_open,
> };
>
> +int __debugfs_file_use_data_start(struct file *file, int *srcu_idx)
> + __acquires(&debugfs_srcu)
> +{
> + *srcu_idx = srcu_read_lock(&debugfs_srcu);
> + if (d_unlinked(file->f_path.dentry))
> + return -EIO;
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(__debugfs_file_use_data_start);
> +
> +ssize_t debugfs_attr_read(struct file *file, char __user *buf,
> + size_t len, loff_t *ppos)
> +{
> + ssize_t ret;
> + int srcu_idx;
> +
> + ret = debugfs_file_use_data_start(file, &srcu_idx);
> + if (!ret)
> + ret = simple_attr_read(file, buf, len, ppos);
> + debugfs_file_use_data_finish(srcu_idx);
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(debugfs_attr_read);
> +
> +ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
> + size_t len, loff_t *ppos)
> +{
> + ssize_t ret;
> + int srcu_idx;
> +
> + ret = debugfs_file_use_data_start(file, &srcu_idx);
> + if (!ret)
> + ret = simple_attr_write(file, buf, len, ppos);
> + debugfs_file_use_data_finish(srcu_idx);
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(debugfs_attr_write);
> +
> static struct dentry *debugfs_create_mode(const char *name, umode_t mode,
> struct dentry *parent, void *value,
> const struct file_operations *fops,
> @@ -95,9 +134,9 @@ static int debugfs_u8_get(void *data, u64 *val)
> *val = *(u8 *)data;
> return 0;
> }
> -DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_u8_ro, debugfs_u8_get, NULL, "%llu\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_u8_ro, debugfs_u8_get, NULL, "%llu\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n");
>
> /**
> * debugfs_create_u8 - create a debugfs file that is used to read and write an unsigned 8-bit value
> @@ -141,9 +180,9 @@ static int debugfs_u16_get(void *data, u64 *val)
> *val = *(u16 *)data;
> return 0;
> }
> -DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_u16_ro, debugfs_u16_get, NULL, "%llu\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_u16_ro, debugfs_u16_get, NULL, "%llu\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n");
>
> /**
> * debugfs_create_u16 - create a debugfs file that is used to read and write an unsigned 16-bit value
> @@ -187,9 +226,9 @@ static int debugfs_u32_get(void *data, u64 *val)
> *val = *(u32 *)data;
> return 0;
> }
> -DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_u32_ro, debugfs_u32_get, NULL, "%llu\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_u32_ro, debugfs_u32_get, NULL, "%llu\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n");
>
> /**
> * debugfs_create_u32 - create a debugfs file that is used to read and write an unsigned 32-bit value
> @@ -234,9 +273,9 @@ static int debugfs_u64_get(void *data, u64 *val)
> *val = *(u64 *)data;
> return 0;
> }
> -DEFINE_SIMPLE_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_u64_ro, debugfs_u64_get, NULL, "%llu\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_ro, debugfs_u64_get, NULL, "%llu\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
>
> /**
> * debugfs_create_u64 - create a debugfs file that is used to read and write an unsigned 64-bit value
> @@ -281,9 +320,10 @@ static int debugfs_ulong_get(void *data, u64 *val)
> *val = *(unsigned long *)data;
> return 0;
> }
> -DEFINE_SIMPLE_ATTRIBUTE(fops_ulong, debugfs_ulong_get, debugfs_ulong_set, "%llu\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_ulong_ro, debugfs_ulong_get, NULL, "%llu\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_ulong_wo, NULL, debugfs_ulong_set, "%llu\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong, debugfs_ulong_get, debugfs_ulong_set,
> + "%llu\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong_ro, debugfs_ulong_get, NULL, "%llu\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong_wo, NULL, debugfs_ulong_set, "%llu\n");
>
> /**
> * debugfs_create_ulong - create a debugfs file that is used to read and write
> @@ -318,21 +358,24 @@ struct dentry *debugfs_create_ulong(const char *name, umode_t mode,
> }
> EXPORT_SYMBOL_GPL(debugfs_create_ulong);
>
> -DEFINE_SIMPLE_ATTRIBUTE(fops_x8, debugfs_u8_get, debugfs_u8_set, "0x%02llx\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_x8_ro, debugfs_u8_get, NULL, "0x%02llx\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_x8_wo, NULL, debugfs_u8_set, "0x%02llx\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_x8, debugfs_u8_get, debugfs_u8_set, "0x%02llx\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_x8_ro, debugfs_u8_get, NULL, "0x%02llx\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_x8_wo, NULL, debugfs_u8_set, "0x%02llx\n");
>
> -DEFINE_SIMPLE_ATTRIBUTE(fops_x16, debugfs_u16_get, debugfs_u16_set, "0x%04llx\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_x16_ro, debugfs_u16_get, NULL, "0x%04llx\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_x16_wo, NULL, debugfs_u16_set, "0x%04llx\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_x16, debugfs_u16_get, debugfs_u16_set,
> + "0x%04llx\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_x16_ro, debugfs_u16_get, NULL, "0x%04llx\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_x16_wo, NULL, debugfs_u16_set, "0x%04llx\n");
>
> -DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set,
> + "0x%08llx\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n");
>
> -DEFINE_SIMPLE_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set, "0x%016llx\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_x64_ro, debugfs_u64_get, NULL, "0x%016llx\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_x64_wo, NULL, debugfs_u64_set, "0x%016llx\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set,
> + "0x%016llx\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_x64_ro, debugfs_u64_get, NULL, "0x%016llx\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_x64_wo, NULL, debugfs_u64_set, "0x%016llx\n");
>
> /*
> * debugfs_create_x{8,16,32,64} - create a debugfs file that is used to read and write an unsigned {8,16,32,64}-bit value
> @@ -425,10 +468,10 @@ static int debugfs_size_t_get(void *data, u64 *val)
> *val = *(size_t *)data;
> return 0;
> }
> -DEFINE_SIMPLE_ATTRIBUTE(fops_size_t, debugfs_size_t_get, debugfs_size_t_set,
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t, debugfs_size_t_get, debugfs_size_t_set,
> "%llu\n"); /* %llu and %zu are more or less the same */
> -DEFINE_SIMPLE_ATTRIBUTE(fops_size_t_ro, debugfs_size_t_get, NULL, "%llu\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_size_t_wo, NULL, debugfs_size_t_set, "%llu\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t_ro, debugfs_size_t_get, NULL, "%llu\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t_wo, NULL, debugfs_size_t_set, "%llu\n");
>
> /**
> * debugfs_create_size_t - create a debugfs file that is used to read and write an size_t value
> @@ -458,10 +501,12 @@ static int debugfs_atomic_t_get(void *data, u64 *val)
> *val = atomic_read((atomic_t *)data);
> return 0;
> }
> -DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get,
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get,
> debugfs_atomic_t_set, "%lld\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t_ro, debugfs_atomic_t_get, NULL, "%lld\n");
> -DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t_wo, NULL, debugfs_atomic_t_set, "%lld\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_ro, debugfs_atomic_t_get, NULL,
> + "%lld\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_wo, NULL, debugfs_atomic_t_set,
> + "%lld\n");
>
> /**
> * debugfs_create_atomic_t - create a debugfs file that is used to read and
> @@ -487,11 +532,18 @@ ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf,
> {
> char buf[3];
> bool *val = file->private_data;
> + int ret, srcu_idx;
>
> + ret = debugfs_file_use_data_start(file, &srcu_idx);
> + if (ret) {
> + debugfs_file_use_data_finish(srcu_idx);
> + return ret;
> + }
> if (*val)
> buf[0] = 'Y';
> else
> buf[0] = 'N';
> + debugfs_file_use_data_finish(srcu_idx);
> buf[1] = '\n';
> buf[2] = 0x00;
> return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
> @@ -505,16 +557,26 @@ ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf,
> size_t buf_size;
> bool bv;
> bool *val = file->private_data;
> + ssize_t ret;
> + int srcu_idx;
>
> buf_size = min(count, (sizeof(buf)-1));
> if (copy_from_user(buf, user_buf, buf_size))
> return -EFAULT;
>
> buf[buf_size] = '\0';
> - if (strtobool(buf, &bv) == 0)
> - *val = bv;
> + if (strtobool(buf, &bv) == 0) {
> + ret = debugfs_file_use_data_start(file, &srcu_idx);
> + if (!ret) {
> + *val = bv;
> + ret = count;
> + }
> + debugfs_file_use_data_finish(srcu_idx);
> + } else {
> + return -EINVAL;
> + }
>
> - return count;
> + return ret;
> }
> EXPORT_SYMBOL_GPL(debugfs_write_file_bool);
>
> @@ -572,9 +634,16 @@ EXPORT_SYMBOL_GPL(debugfs_create_bool);
> static ssize_t read_file_blob(struct file *file, char __user *user_buf,
> size_t count, loff_t *ppos)
> {
> + ssize_t ret;
> + int srcu_idx;
> struct debugfs_blob_wrapper *blob = file->private_data;
> - return simple_read_from_buffer(user_buf, count, ppos, blob->data,
> - blob->size);
> +
> + ret = debugfs_file_use_data_start(file, &srcu_idx);
> + if (!ret)
> + ret = simple_read_from_buffer(user_buf, count, ppos, blob->data,
> + blob->size);
> + debugfs_file_use_data_finish(srcu_idx);
> + return ret;
> }
>
> static const struct file_operations fops_blob = {
> @@ -643,6 +712,7 @@ static int u32_array_open(struct inode *inode, struct file *file)
> struct array_data *data = inode->i_private;
> int size, elements = data->elements;
> char *buf;
> + int ret, srcu_idx;
>
> /*
> * Max size:
> @@ -656,9 +726,17 @@ static int u32_array_open(struct inode *inode, struct file *file)
> buf[size] = 0;
>
> file->private_data = buf;
> + ret = debugfs_file_use_data_start(file, &srcu_idx);
> + if (ret) {
> + kfree(file->private_data);
> + goto out;
> + }
> u32_format_array(buf, size, data->array, data->elements);
> + nonseekable_open(inode, file);
>
> - return nonseekable_open(inode, file);
> +out:
> + debugfs_file_use_data_finish(srcu_idx);
> + return ret;
> }
>
> static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len,
> @@ -761,15 +839,21 @@ EXPORT_SYMBOL_GPL(debugfs_print_regs32);
>
> static int debugfs_show_regset32(struct seq_file *s, void *data)
> {
> - struct debugfs_regset32 *regset = s->private;
> + struct file *f = s->private;
> + struct debugfs_regset32 *regset = file_inode(f)->i_private;
> + int ret, srcu_idx;
>
> - debugfs_print_regs32(s, regset->regs, regset->nregs, regset->base, "");
> - return 0;
> + ret = debugfs_file_use_data_start(f, &srcu_idx);
> + if (!ret)
> + debugfs_print_regs32(s, regset->regs, regset->nregs,
> + regset->base, "");
> + debugfs_file_use_data_finish(srcu_idx);
> + return ret;
> }
>
> static int debugfs_open_regset32(struct inode *inode, struct file *file)
> {
> - return single_open(file, debugfs_show_regset32, inode->i_private);
> + return single_open(file, debugfs_show_regset32, file);
> }
>
> static const struct file_operations fops_regset32 = {
> @@ -826,11 +910,24 @@ static int debugfs_devm_entry_open(struct inode *inode, struct file *f)
> return single_open(f, entry->read, entry->dev);
> }
>
> +static ssize_t debugfs_devm_entry_read(struct file *file, char __user *buf,
> + size_t size, loff_t *ppos)
> +{
> + ssize_t ret;
> + int srcu_idx;
> +
> + ret = debugfs_file_use_data_start(file, &srcu_idx);
> + if (!ret)
> + ret = seq_read(file, buf, size, ppos);
> + debugfs_file_use_data_finish(srcu_idx);
> + return ret;
> +}
> +
> static const struct file_operations debugfs_devm_entry_ops = {
> .owner = THIS_MODULE,
> .open = debugfs_devm_entry_open,
> .release = single_release,
> - .read = seq_read,
> + .read = debugfs_devm_entry_read,
> .llseek = seq_lseek
> };
>
> diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
> index 8ae2e1a..37222c9 100644
> --- a/fs/debugfs/inode.c
> +++ b/fs/debugfs/inode.c
> @@ -31,6 +31,7 @@
> #define DEBUGFS_DEFAULT_MODE 0700
>
> DEFINE_SRCU(debugfs_srcu);
> +EXPORT_SYMBOL_GPL(debugfs_srcu);
>
> static struct vfsmount *debugfs_mount;
> static int debugfs_mount_count;
> diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
> index f8c7494..ba1a299 100644
> --- a/include/linux/debugfs.h
> +++ b/include/linux/debugfs.h
> @@ -20,6 +20,7 @@
>
> #include <linux/types.h>
> #include <linux/srcu.h>
> +#include <linux/compiler.h>
>
> struct device;
> struct file_operations;
> @@ -128,6 +129,71 @@ ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf,
> ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf,
> size_t count, loff_t *ppos);
>
> +int __debugfs_file_use_data_start(struct file *file, int *srcu_idx)
> + __acquires(&debugfs_srcu);
> +
> +/**
> + * debugfs_file_use_data_start - mark the beginning of file data access
> + * @file: the file object whose data is being accessed.
> + * @srcu_idx: a pointer to some memory to store a SRCU index in.
> + *
> + * Up to a matching call to debugfs_file_use_data_finish(), any
> + * successive call into the file removing functions debugfs_remove()
> + * and debugfs_remove_recursive() will block. Since associated private
> + * file data may only get freed after a successful return of any of
> + * the removal functions, you may safely access it after a successful
> + * call to debugfs_file_use_data_start() without worrying about
> + * lifetime issues.
> + *
> + * If -%EIO is returned, the file has already been removed and thus,
> + * it is not safe to access any of its data. If, on the other hand,
> + * it is allowed to access the file data, zero is returned.
> + *
> + * Regardless of the return code, any call to
> + * debugfs_file_use_data_start() must be followed by a matching call
> + * to debugfs_file_use_data_finish().
> + */
> +static inline int debugfs_file_use_data_start(struct file *file, int *srcu_idx)
> + __acquires(&debugfs_srcu)
> +{
> + return __debugfs_file_use_data_start(file, srcu_idx);
> +}
> +
> +/**
> + * debugfs_file_use_data_finish - mark the end of file data access
> + * @srcu_idx: the SRCU index "created" by a former call to
> + * debugfs_file_use_data_start().
> + *
> + * Allow any ongoing concurrent call into debugfs_remove() or
> + * debugfs_remove_recursive() blocked by a former call to
> + * debugfs_file_use_data_start() to proceed and return to its caller.
> + */
> +static inline void debugfs_file_use_data_finish(int srcu_idx)
> + __releases(&debugfs_srcu)
> +{
> + srcu_read_unlock(&debugfs_srcu, srcu_idx);
> +}
> +
> +ssize_t debugfs_attr_read(struct file *file, char __user *buf,
> + size_t len, loff_t *ppos);
> +ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
> + size_t len, loff_t *ppos);
> +
> +#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \
> +static int __fops ## _open(struct inode *inode, struct file *file) \
> +{ \
> + __simple_attr_check_format(__fmt, 0ull); \
> + return simple_attr_open(inode, file, __get, __set, __fmt); \
> +} \
> +static const struct file_operations __fops = { \
> + .owner = THIS_MODULE, \
> + .open = __fops ## _open, \
> + .release = simple_attr_release, \
> + .read = debugfs_attr_read, \
> + .write = debugfs_attr_write, \
> + .llseek = generic_file_llseek, \
> +}
> +
> #else
>
> #include <linux/err.h>
> --
> 2.6.3
>
"Paul E. McKenney" <[email protected]> writes:
> On Tue, Dec 01, 2015 at 12:21:31AM +0100, Nicolai Stange wrote:
>> Nothing prevents a dentry found by path lookup before a return of
>> __debugfs_remove() to actually get opened after that return. Now, after
>> the return of __debugfs_remove(), there are no guarantees whatsoever
>> regarding the memory the corresponding inode's file_operations object
>> had been kept in.
>>
>> Since __debugfs_remove() is seldomly invoked, usually from module exit
>> handlers only, the race is hard to trigger and the impact is very low.
>>
>> A discussion of the problem outlined above as well as a suggested
>> solution can be found in the (sub-)thread rooted at
>>
>> http://lkml.kernel.org/g/[email protected]
>> ("Yet another pipe related oops.")
>>
>> Basically, Greg KH suggests to introduce an intermediate fops and
>> Al Viro points out that a pointer to the original ones may be stored in
>> ->d_fsdata.
>>
>> Follow this line of reasoning:
>> - Add SRCU as a reverse dependency of DEBUG_FS.
>> - Introduce a srcu_struct object for the debugfs subsystem.
>> - In debugfs_create_file(), store a pointer to the original
>> file_operations object in ->d_fsdata.
>> - In __debugfs_remove(), clear out that dentry->d_fsdata member again.
>> debugfs_remove() and debugfs_remove_recursive() wait for a SRCU grace
>> period before returning to their caller.
>> - Introduce an intermediate file_operations object named
>> "debugfs_proxy_file_operations". It's ->open() functions checks,
>> under the protection of a SRCU read lock, whether the "original"
>> file_operations pointed to by ->d_fsdata is still valid and if so,
>> tries to acquire a reference on the owning module. On success, it sets
>> the file object's ->f_op to the original file_operations and forwards
>> the ongoing open() call to the original ->open().
>> - For clarity, rename the former debugfs_file_operations to
>> debugfs_noop_file_operations -- they are in no way canonical.
>>
>> The choice of SRCU over "normal" RCU is justified by the fact, that the
>> former may also be used to protect ->i_private data from going away
>> during the execution of a file's readers and writers which may (and do)
>> sleep.
>>
>> Signed-off-by: Nicolai Stange <[email protected]>
>
> One question below. Other than that, looks good from an RCU perspective.
>
> Thanx, Paul
>
>> ---
>> Applicable to the Linus tree.
>> The second of the two patches depends on the first one
>>
>> fs/debugfs/file.c | 29 ++++++++++++++++++++++++++++-
>> fs/debugfs/inode.c | 16 +++++++++++++---
>> include/linux/debugfs.h | 6 +++++-
>> lib/Kconfig.debug | 1 +
>> 4 files changed, 47 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
>> index d2ba12e..67df2c9 100644
>> --- a/fs/debugfs/file.c
>> +++ b/fs/debugfs/file.c
>> @@ -35,13 +35,40 @@ static ssize_t default_write_file(struct file *file, const char __user *buf,
>> return count;
>> }
>>
>> -const struct file_operations debugfs_file_operations = {
>> +const struct file_operations debugfs_noop_file_operations = {
>> .read = default_read_file,
>> .write = default_write_file,
>> .open = simple_open,
>> .llseek = noop_llseek,
>> };
>>
>> +static int proxy_open(struct inode *inode, struct file *filp)
>> +{
>> + struct dentry * const dentry = filp->f_path.dentry;
>> + const struct file_operations __rcu *rcu_real_fops;
>> + const struct file_operations *real_fops;
>> + int srcu_idx;
>> +
>> + srcu_idx = srcu_read_lock(&debugfs_srcu);
>> + rcu_real_fops = (const struct file_operations __rcu *)dentry->d_fsdata;
>> + real_fops = srcu_dereference(rcu_real_fops, &debugfs_srcu);
>> + real_fops = fops_get(real_fops);
>> + srcu_read_unlock(&debugfs_srcu, srcu_idx);
>> +
>> + if (!real_fops)
>> + return -ENOENT;
>> +
>> + replace_fops(filp, real_fops);
>> + if (filp->f_op->open)
>> + return filp->f_op->open(inode, filp);
>> +
>> + return 0;
>> +}
>> +
>> +const struct file_operations debugfs_proxy_file_operations = {
>> + .open = proxy_open,
>> +};
>> +
>> static struct dentry *debugfs_create_mode(const char *name, umode_t mode,
>> struct dentry *parent, void *value,
>> const struct file_operations *fops,
>> diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
>> index b7fcc0d..8ae2e1a 100644
>> --- a/fs/debugfs/inode.c
>> +++ b/fs/debugfs/inode.c
>> @@ -30,6 +30,8 @@
>>
>> #define DEBUGFS_DEFAULT_MODE 0700
>>
>> +DEFINE_SRCU(debugfs_srcu);
>> +
>> static struct vfsmount *debugfs_mount;
>> static int debugfs_mount_count;
>> static bool debugfs_registered;
>> @@ -340,8 +342,12 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode,
>> return failed_creating(dentry);
>>
>> inode->i_mode = mode;
>> - inode->i_fop = fops ? fops : &debugfs_file_operations;
>> inode->i_private = data;
>> +
>> + inode->i_fop = fops ? &debugfs_proxy_file_operations
>> + : &debugfs_noop_file_operations;
>> + rcu_assign_pointer(dentry->d_fsdata, (void *)fops);
>> +
>> d_instantiate(dentry, inode);
>> fsnotify_create(d_inode(dentry->d_parent), dentry);
>> return end_creating(dentry);
>> @@ -523,10 +529,12 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
>>
>> if (simple_positive(dentry)) {
>> dget(dentry);
>> - if (d_is_dir(dentry))
>> + if (d_is_dir(dentry)) {
>> ret = simple_rmdir(d_inode(parent), dentry);
>> - else
>> + } else {
>> simple_unlink(d_inode(parent), dentry);
>> + rcu_assign_pointer(dentry->d_fsdata, NULL);
>> + }
>> if (!ret)
>> d_delete(dentry);
>> dput(dentry);
>> @@ -565,6 +573,7 @@ void debugfs_remove(struct dentry *dentry)
>> mutex_unlock(&d_inode(parent)->i_mutex);
>> if (!ret)
>> simple_release_fs(&debugfs_mount, &debugfs_mount_count);
>> + synchronize_srcu(&debugfs_srcu);
>
> So the reason that this is OK from a performance perspective is that
> people removing large quantities of debugfs entries are supposed to
> use debugfs_remove_recursive()?
>
First of all, thank you very much for your feedback!
And yes, that's the idea behind not putting a synchronize_srcu() at the
end of __debugfs_remove() but at its callers debugfs_remove() and
debugfs_remove_recursive(): amortizing a single SRCU grace period among
multiple removals in the latter case.
I did not systematically investigate whether debugfs_remove_recursive()
is consistently used in preference of multiple debugfs_remove() calls
across the tree though. My subjective impression is that most debugfs
users I looked at are using the *_recursive() variant already.
> If there are valid use cases with lots of debugfs_remove() calls
> in quick succession,
The only thing I'm concerned about is system shutdown, provided module
exit handlers get called there. However, in my tests I did not encounter
any perceptible delays.
> and if this results in excessive latency due to
> lots of synchronize_srcu() calls, then one approach would be to have a
> debugfs_remove_nowait() or some such that omitted the final
> synchronize_srcu(). The last call could use debugfs_remove() to clean
> things up. There are of course a number of alternative approaches.
There is the not yet exported __debugfs_remove() already. If the need
arises, we could simply export that one.
> But if there is no performance issue with real workloads, then there
> is of course no need to do anything.
>
>> }
>> EXPORT_SYMBOL_GPL(debugfs_remove);
>>
>> @@ -642,6 +651,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
>> if (!__debugfs_remove(child, parent))
>> simple_release_fs(&debugfs_mount, &debugfs_mount_count);
>> mutex_unlock(&d_inode(parent)->i_mutex);
>> + synchronize_srcu(&debugfs_srcu);
>> }
>> EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
>>
>> diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
>> index 19c066d..f8c7494 100644
>> --- a/include/linux/debugfs.h
>> +++ b/include/linux/debugfs.h
>> @@ -19,6 +19,7 @@
>> #include <linux/seq_file.h>
>>
>> #include <linux/types.h>
>> +#include <linux/srcu.h>
>>
>> struct device;
>> struct file_operations;
>> @@ -44,7 +45,10 @@ extern struct dentry *arch_debugfs_dir;
>> #if defined(CONFIG_DEBUG_FS)
>>
>> /* declared over in file.c */
>> -extern const struct file_operations debugfs_file_operations;
>> +extern const struct file_operations debugfs_noop_file_operations;
>> +extern const struct file_operations debugfs_proxy_file_operations;
>> +
>> +extern struct srcu_struct debugfs_srcu;
>>
>> struct dentry *debugfs_create_file(const char *name, umode_t mode,
>> struct dentry *parent, void *data,
>> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
>> index 8c15b29..3929878 100644
>> --- a/lib/Kconfig.debug
>> +++ b/lib/Kconfig.debug
>> @@ -257,6 +257,7 @@ config PAGE_OWNER
>>
>> config DEBUG_FS
>> bool "Debug Filesystem"
>> + select SRCU
>> help
>> debugfs is a virtual file system that kernel developers use to put
>> debugging files into. Enable this option to be able to read and
>> --
>> 2.6.3
>>