Here's a series for fixing issues with d_drop on a directory dentry with
children and adding support for such dropped directories in fuse.
This one fixes a couple of issues I found with the previous incarnation and
split out the filesystem parts into individual patches.
Thanks,
Miklos
---
Anand Avati (1):
fuse: drop dentry on failed revalidate
Miklos Szeredi (8):
vfs: check submounts and drop atomically
vfs: check unlinked ancestors before mount
afs: use check_submounts_and_drop()
gfs2: use check_submounts_and_drop()
nfs: use check_submounts_and_drop()
sysfs: use check_submounts_and_drop()
fuse: use d_materialise_unique()
fuse: clean up return in fuse_dentry_revalidate()
---
fs/afs/dir.c | 10 +--
fs/dcache.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++++
fs/fuse/dir.c | 97 ++++++++++++-------------
fs/gfs2/dentry.c | 9 +--
fs/internal.h | 1 +
fs/namespace.c | 9 +++
fs/nfs/dir.c | 9 ++-
fs/sysfs/dir.c | 20 +++---
include/linux/dcache.h | 1 +
9 files changed, 267 insertions(+), 81 deletions(-)
From: Miklos Szeredi <[email protected]>
We check submounts before doing d_drop() on a non-empty directory dentry in
NFS (have_submounts()), but we do not exclude a racing mount.
Process A: have_submounts() -> returns false
Process B: mount() -> success
Process A: d_drop()
This patch prepares the ground for the fix by doing the following
operations all under the same rename lock:
have_submounts()
shrink_dcache_parent()
d_drop()
This is actually an optimization since have_submounts() and
shrink_dcache_parent() both traverse the same dentry tree separately.
Signed-off-by: Miklos Szeredi <[email protected]>
CC: David Howells <[email protected]>
CC: Steven Whitehouse <[email protected]>
CC: Trond Myklebust <[email protected]>
CC: Greg Kroah-Hartman <[email protected]>
---
fs/dcache.c | 157 +++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/dcache.h | 1 +
2 files changed, 158 insertions(+)
diff --git a/fs/dcache.c b/fs/dcache.c
index 87bdb53..020004d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1224,6 +1224,163 @@ void shrink_dcache_parent(struct dentry * parent)
}
EXPORT_SYMBOL(shrink_dcache_parent);
+static int __check_submounts_and_drop(struct dentry *parent,
+ struct list_head *dispose)
+{
+ struct dentry *this_parent;
+ struct list_head *next;
+ unsigned seq;
+ int found = 0;
+ int locked = 0;
+
+ seq = read_seqbegin(&rename_lock);
+again:
+ this_parent = parent;
+ spin_lock(&this_parent->d_lock);
+repeat:
+ next = this_parent->d_subdirs.next;
+resume:
+ while (next != &this_parent->d_subdirs) {
+ struct list_head *tmp = next;
+ struct dentry *dentry;
+
+ dentry = list_entry(tmp, struct dentry, d_u.d_child);
+ next = tmp->next;
+
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ if (d_mountpoint(dentry)) {
+ spin_unlock(&dentry->d_lock);
+ found = -EBUSY;
+ goto out;
+ }
+
+ /*
+ * move only zero ref count dentries to the dispose list.
+ *
+ * Those which are presently on the shrink list, being processed
+ * by shrink_dentry_list(), shouldn't be moved. Otherwise the
+ * loop in shrink_dcache_parent() might not make any progress
+ * and loop forever.
+ */
+ if (dentry->d_count) {
+ dentry_lru_del(dentry);
+ } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
+ dentry_lru_move_list(dentry, dispose);
+ dentry->d_flags |= DCACHE_SHRINK_LIST;
+ found++;
+ }
+ /*
+ * We can return to the caller if we have found some (this
+ * ensures forward progress). We'll be coming back to find
+ * the rest.
+ */
+ if (found && need_resched()) {
+ spin_unlock(&dentry->d_lock);
+ goto out;
+ }
+
+ /*
+ * Descend a level if the d_subdirs list is non-empty.
+ */
+ if (!list_empty(&dentry->d_subdirs)) {
+ spin_unlock(&this_parent->d_lock);
+ spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
+ this_parent = dentry;
+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
+ goto repeat;
+ }
+
+ spin_unlock(&dentry->d_lock);
+ }
+ /*
+ * All done at this level ... ascend and resume the search.
+ */
+ if (this_parent != parent) {
+ struct dentry *child = this_parent;
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
+ goto rename_retry;
+ next = child->d_u.d_child.next;
+ goto resume;
+ }
+ if (!locked && read_seqretry(&rename_lock, seq)) {
+ spin_unlock(&this_parent->d_lock);
+ goto rename_retry;
+ }
+ if (d_mountpoint(this_parent))
+ found = -EBUSY;
+ if (!found)
+ __d_drop(this_parent);
+out:
+ spin_unlock(&this_parent->d_lock);
+
+ if (locked)
+ write_sequnlock(&rename_lock);
+ return found;
+
+rename_retry:
+ if (found)
+ return found;
+ if (locked)
+ goto again;
+ locked = 1;
+ write_seqlock(&rename_lock);
+ goto again;
+}
+
+/**
+ * check_submounts_and_drop - prune dcache, check for submounts and drop
+ *
+ * All done as a single atomic operation relative to has_unlinked_ancestor().
+ * Returns 0 if successfully unhashed @parent. If there were submounts then
+ * return -EBUSY.
+ *
+ * @dentry: dentry to prune and drop
+ */
+int check_submounts_and_drop(struct dentry *dentry)
+{
+ int ret = 0;
+
+ /* Negative dentries can be dropped without further checks */
+ if (!dentry->d_inode) {
+ d_drop(dentry);
+ goto out;
+ }
+
+ spin_lock(&dentry->d_lock);
+ if (d_unhashed(dentry))
+ goto out_unlock;
+ if (list_empty(&dentry->d_subdirs)) {
+ if (d_mountpoint(dentry)) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ __d_drop(dentry);
+ goto out_unlock;
+ }
+ spin_unlock(&dentry->d_lock);
+
+ for (;;) {
+ LIST_HEAD(dispose);
+ ret = __check_submounts_and_drop(dentry, &dispose);
+ if (!list_empty(&dispose))
+ shrink_dentry_list(&dispose);
+
+ if (ret <= 0)
+ break;
+
+ cond_resched();
+ }
+
+out:
+ return ret;
+
+out_unlock:
+ spin_unlock(&dentry->d_lock);
+ goto out;
+}
+EXPORT_SYMBOL(check_submounts_and_drop);
+
/**
* __d_alloc - allocate a dcache entry
* @sb: filesystem it will belong to
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index b90337c..41b21ca 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -251,6 +251,7 @@ extern void d_prune_aliases(struct inode *);
/* test whether we have any submounts in a subdir tree */
extern int have_submounts(struct dentry *);
+extern int check_submounts_and_drop(struct dentry *);
/*
* This adds the entry to the hash queues.
--
1.8.1.4
From: Miklos Szeredi <[email protected]>
Do have_submounts(), shrink_dcache_parent() and d_drop() atomically.
check_submounts_and_drop() can deal with negative dentries and
non-directories as well.
Non-directories can also be mounted on. And just like directories we don't
want these to disappear with invalidation.
Signed-off-by: Miklos Szeredi <[email protected]>
CC: Steven Whitehouse <[email protected]>
---
fs/gfs2/dentry.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index f2448ab..d3a5d4e 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -93,12 +93,9 @@ invalid_gunlock:
if (!had_lock)
gfs2_glock_dq_uninit(&d_gh);
invalid:
- if (inode && S_ISDIR(inode->i_mode)) {
- if (have_submounts(dentry))
- goto valid;
- shrink_dcache_parent(dentry);
- }
- d_drop(dentry);
+ if (check_submounts_and_drop(dentry) != 0)
+ goto valid;
+
dput(parent);
return 0;
--
1.8.1.4
From: Miklos Szeredi <[email protected]>
On errors unrelated to the filesystem's state (ENOMEM, ENOTCONN) return the
error itself from ->d_revalidate() insted of returning zero (invalid).
Also make a common label for invalidating the dentry. This will be used by
the next patch.
Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/fuse/dir.c | 26 ++++++++++++++++++--------
1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 131d14b..25c6cfe 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -182,10 +182,11 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
struct inode *inode;
struct dentry *parent;
struct fuse_conn *fc;
+ int ret;
inode = ACCESS_ONCE(entry->d_inode);
if (inode && is_bad_inode(inode))
- return 0;
+ goto invalid;
else if (fuse_dentry_time(entry) < get_jiffies_64()) {
int err;
struct fuse_entry_out outarg;
@@ -195,20 +196,23 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
/* For negative dentries, always do a fresh lookup */
if (!inode)
- return 0;
+ goto invalid;
+ ret = -ECHILD;
if (flags & LOOKUP_RCU)
- return -ECHILD;
+ goto out;
fc = get_fuse_conn(inode);
req = fuse_get_req_nopages(fc);
+ ret = PTR_ERR(req);
if (IS_ERR(req))
- return 0;
+ goto out;
forget = fuse_alloc_forget();
if (!forget) {
fuse_put_request(fc, req);
- return 0;
+ ret = -ENOMEM;
+ goto out;
}
attr_version = fuse_get_attr_version(fc);
@@ -227,7 +231,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
struct fuse_inode *fi = get_fuse_inode(inode);
if (outarg.nodeid != get_node_id(inode)) {
fuse_queue_forget(fc, forget, outarg.nodeid, 1);
- return 0;
+ goto invalid;
}
spin_lock(&fc->lock);
fi->nlookup++;
@@ -235,7 +239,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
}
kfree(forget);
if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
- return 0;
+ goto invalid;
fuse_change_attributes(inode, &outarg.attr,
entry_attr_timeout(&outarg),
@@ -249,7 +253,13 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
dput(parent);
}
}
- return 1;
+ ret = 1;
+out:
+ return ret;
+
+invalid:
+ ret = 0;
+ goto out;
}
static int invalid_nodeid(u64 nodeid)
--
1.8.1.4
From: Miklos Szeredi <[email protected]>
Use d_materialise_unique() instead of d_splice_alias(). This allows dentry
subtrees to be moved to a new place if there moved, even if something is
referencing a dentry in the subtree (open fd, cwd, etc..).
This will also allow us to drop a subtree if it is found to be replaced by
something else. In this case the disconnected subtree can later be
reconnected to its new location.
d_materialise_unique() ensures that a directory entry only ever has one
alias. We keep fc->inst_mutex around the calls for d_materialise_unique()
on directories to prevent a race with mkdir "stealing" the inode.
Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/fuse/dir.c | 69 ++++++++++++++++++++++-------------------------------------
1 file changed, 26 insertions(+), 43 deletions(-)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 72a5d5b..131d14b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -267,26 +267,6 @@ int fuse_valid_type(int m)
S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
}
-/*
- * Add a directory inode to a dentry, ensuring that no other dentry
- * refers to this inode. Called with fc->inst_mutex.
- */
-static struct dentry *fuse_d_add_directory(struct dentry *entry,
- struct inode *inode)
-{
- struct dentry *alias = d_find_alias(inode);
- if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
- /* This tries to shrink the subtree below alias */
- fuse_invalidate_entry(alias);
- dput(alias);
- if (!hlist_empty(&inode->i_dentry))
- return ERR_PTR(-EBUSY);
- } else {
- dput(alias);
- }
- return d_splice_alias(inode, entry);
-}
-
int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
struct fuse_entry_out *outarg, struct inode **inode)
{
@@ -345,6 +325,24 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
return err;
}
+static struct dentry *fuse_materialise_dentry(struct dentry *dentry,
+ struct inode *inode)
+{
+ struct dentry *newent;
+
+ if (inode && S_ISDIR(inode->i_mode)) {
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ mutex_lock(&fc->inst_mutex);
+ newent = d_materialise_unique(dentry, inode);
+ mutex_unlock(&fc->inst_mutex);
+ } else {
+ newent = d_materialise_unique(dentry, inode);
+ }
+
+ return newent;
+}
+
static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
unsigned int flags)
{
@@ -352,7 +350,6 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
struct fuse_entry_out outarg;
struct inode *inode;
struct dentry *newent;
- struct fuse_conn *fc = get_fuse_conn(dir);
bool outarg_valid = true;
err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
@@ -368,16 +365,10 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
if (inode && get_node_id(inode) == FUSE_ROOT_ID)
goto out_iput;
- if (inode && S_ISDIR(inode->i_mode)) {
- mutex_lock(&fc->inst_mutex);
- newent = fuse_d_add_directory(entry, inode);
- mutex_unlock(&fc->inst_mutex);
- err = PTR_ERR(newent);
- if (IS_ERR(newent))
- goto out_iput;
- } else {
- newent = d_splice_alias(inode, entry);
- }
+ newent = fuse_materialise_dentry(entry, inode);
+ err = PTR_ERR(newent);
+ if (IS_ERR(newent))
+ goto out_err;
entry = newent ? newent : entry;
if (outarg_valid)
@@ -1275,18 +1266,10 @@ static int fuse_direntplus_link(struct file *file,
if (!inode)
goto out;
- if (S_ISDIR(inode->i_mode)) {
- mutex_lock(&fc->inst_mutex);
- alias = fuse_d_add_directory(dentry, inode);
- mutex_unlock(&fc->inst_mutex);
- err = PTR_ERR(alias);
- if (IS_ERR(alias)) {
- iput(inode);
- goto out;
- }
- } else {
- alias = d_splice_alias(inode, dentry);
- }
+ alias = fuse_materialise_dentry(dentry, inode);
+ err = PTR_ERR(alias);
+ if (IS_ERR(alias))
+ goto out;
if (alias) {
dput(dentry);
--
1.8.1.4
From: Anand Avati <[email protected]>
Drop a subtree when we find that it has moved or been delated. This can be
done as long as there are no submounts under this location.
If the directory was moved and we come across the same directory in a
future lookup it will be reconnected by d_materialise_unique().
Signed-off-by: Anand Avati <[email protected]>
Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/fuse/dir.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 25c6cfe..0e6961a 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -259,6 +259,8 @@ out:
invalid:
ret = 0;
+ if (check_submounts_and_drop(entry) != 0)
+ ret = 1;
goto out;
}
--
1.8.1.4
From: Miklos Szeredi <[email protected]>
Do have_submounts(), shrink_dcache_parent() and d_drop() atomically.
check_submounts_and_drop() can deal with negative dentries and
non-directories as well.
Non-directories can also be mounted on. And just like directories we don't
want these to disappear with invalidation.
Signed-off-by: Miklos Szeredi <[email protected]>
CC: Trond Myklebust <[email protected]>
---
fs/nfs/dir.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e474ca2b..7468735 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1135,14 +1135,13 @@ out_zap_parent:
if (inode && S_ISDIR(inode->i_mode)) {
/* Purge readdir caches. */
nfs_zap_caches(inode);
- /* If we have submounts, don't unhash ! */
- if (have_submounts(dentry))
- goto out_valid;
if (dentry->d_flags & DCACHE_DISCONNECTED)
goto out_valid;
- shrink_dcache_parent(dentry);
}
- d_drop(dentry);
+ /* If we have submounts, don't unhash ! */
+ if (check_submounts_and_drop(dentry) != 0)
+ goto out_valid;
+
dput(parent);
dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
__func__, dentry->d_parent->d_name.name,
--
1.8.1.4
From: Miklos Szeredi <[email protected]>
Do have_submounts(), shrink_dcache_parent() and d_drop() atomically.
check_submounts_and_drop() can deal with negative dentries and
non-directories as well.
Non-directories can also be mounted on. And just like directories we don't
want these to disappear with invalidation.
Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/sysfs/dir.c | 20 ++++++++------------
1 file changed, 8 insertions(+), 12 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e068e74..fde8856 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -297,7 +297,6 @@ static int sysfs_dentry_delete(const struct dentry *dentry)
static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags)
{
struct sysfs_dirent *sd;
- int is_dir;
int type;
if (flags & LOOKUP_RCU)
@@ -341,18 +340,15 @@ out_bad:
* is performed at its new name the dentry will be readded
* to the dcache hashes.
*/
- is_dir = (sysfs_type(sd) == SYSFS_DIR);
mutex_unlock(&sysfs_mutex);
- if (is_dir) {
- /* If we have submounts we must allow the vfs caches
- * to lie about the state of the filesystem to prevent
- * leaks and other nasty things.
- */
- if (have_submounts(dentry))
- goto out_valid;
- shrink_dcache_parent(dentry);
- }
- d_drop(dentry);
+
+ /* If we have submounts we must allow the vfs caches
+ * to lie about the state of the filesystem to prevent
+ * leaks and other nasty things.
+ */
+ if (check_submounts_and_drop(dentry) != 0)
+ goto out_valid;
+
return 0;
}
--
1.8.1.4
From: Miklos Szeredi <[email protected]>
Do have_submounts(), shrink_dcache_parent() and d_drop() atomically.
check_submounts_and_drop() can deal with negative dentries as well.
Signed-off-by: Miklos Szeredi <[email protected]>
CC: David Howells <[email protected]>
---
fs/afs/dir.c | 10 +++-------
1 file changed, 3 insertions(+), 7 deletions(-)
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 34494fb..0b74d31 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -685,16 +685,12 @@ not_found:
spin_unlock(&dentry->d_lock);
out_bad:
- if (dentry->d_inode) {
- /* don't unhash if we have submounts */
- if (have_submounts(dentry))
- goto out_skip;
- }
+ /* don't unhash if we have submounts */
+ if (check_submounts_and_drop(dentry) != 0)
+ goto out_skip;
_debug("dropping dentry %s/%s",
parent->d_name.name, dentry->d_name.name);
- shrink_dcache_parent(dentry);
- d_drop(dentry);
dput(parent);
key_put(key);
--
1.8.1.4
From: Miklos Szeredi <[email protected]>
We check submounts before doing d_drop() on a non-empty directory dentry in
NFS (have_submounts()), but we do not exclude a racing mount. Nor do we
prevent mounts to be added to the disconnected subtree using relative paths
after the d_drop().
This patch fixes these issues by checking for unlinked (unhashed, non-root)
ancestors before proceeding with the mount. This is done after setting
DCACHE_MOUNTED on the soon-to-be mountpoint and with the rename seqlock
taken for write. This ensures that the only one of
check_submounts_and_drop() or has_unlinked_ancestor() can succeed.
Signed-off-by: Miklos Szeredi <[email protected]>
CC: David Howells <[email protected]>
CC: Steven Whitehouse <[email protected]>
CC: Trond Myklebust <[email protected]>
CC: Greg Kroah-Hartman <[email protected]>
---
fs/dcache.c | 35 +++++++++++++++++++++++++++++++++++
fs/internal.h | 1 +
fs/namespace.c | 9 +++++++++
3 files changed, 45 insertions(+)
diff --git a/fs/dcache.c b/fs/dcache.c
index 020004d..1333445 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1103,6 +1103,41 @@ rename_retry:
}
EXPORT_SYMBOL(have_submounts);
+static bool __has_unlinked_ancestor(struct dentry *dentry)
+{
+ struct dentry *this;
+
+ for (this = dentry; !IS_ROOT(this); this = this->d_parent) {
+ int is_unhashed;
+
+ /* Need exclusion wrt. check_submounts_and_drop() */
+ spin_lock(&this->d_lock);
+ is_unhashed = d_unhashed(this);
+ spin_unlock(&this->d_lock);
+
+ if (is_unhashed)
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Called by mount code to check if the mountpoint is reachable (e.g. NFS can
+ * unhash a directory dentry and then the complete subtree can become
+ * unreachable).
+ */
+bool has_unlinked_ancestor(struct dentry *dentry)
+{
+ bool found;
+
+ /* Need exclusion wrt. check_submounts_and_drop() */
+ write_seqlock(&rename_lock);
+ found = __has_unlinked_ancestor(dentry);
+ write_sequnlock(&rename_lock);
+
+ return found;
+}
+
/*
* Search the dentry child list of the specified parent,
* and move any unused dentries to the end of the unused
diff --git a/fs/internal.h b/fs/internal.h
index 7c5f01c..d232355 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -126,6 +126,7 @@ extern int invalidate_inodes(struct super_block *, bool);
* dcache.c
*/
extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
+extern bool has_unlinked_ancestor(struct dentry *dentry);
/*
* read_write.c
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b1ca9b..bb92a9c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -634,6 +634,15 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry)
}
dentry->d_flags |= DCACHE_MOUNTED;
spin_unlock(&dentry->d_lock);
+
+ if (has_unlinked_ancestor(dentry)) {
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags &= ~DCACHE_MOUNTED;
+ spin_unlock(&dentry->d_lock);
+ kfree(mp);
+ return ERR_PTR(-ENOENT);
+ }
+
mp->m_dentry = dentry;
mp->m_count = 1;
list_add(&mp->m_hash, chain);
--
1.8.1.4
On Thu, 8 Aug 2013 17:24:42 +0200
Miklos Szeredi <[email protected]> wrote:
> From: Miklos Szeredi <[email protected]>
>
> We check submounts before doing d_drop() on a non-empty directory dentry in
> NFS (have_submounts()), but we do not exclude a racing mount.
>
> Process A: have_submounts() -> returns false
> Process B: mount() -> success
> Process A: d_drop()
>
> This patch prepares the ground for the fix by doing the following
> operations all under the same rename lock:
>
> have_submounts()
> shrink_dcache_parent()
> d_drop()
>
> This is actually an optimization since have_submounts() and
> shrink_dcache_parent() both traverse the same dentry tree separately.
>
> Signed-off-by: Miklos Szeredi <[email protected]>
> CC: David Howells <[email protected]>
> CC: Steven Whitehouse <[email protected]>
> CC: Trond Myklebust <[email protected]>
> CC: Greg Kroah-Hartman <[email protected]>
> ---
> fs/dcache.c | 157 +++++++++++++++++++++++++++++++++++++++++++++++++
> include/linux/dcache.h | 1 +
> 2 files changed, 158 insertions(+)
>
> diff --git a/fs/dcache.c b/fs/dcache.c
> index 87bdb53..020004d 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -1224,6 +1224,163 @@ void shrink_dcache_parent(struct dentry * parent)
> }
> EXPORT_SYMBOL(shrink_dcache_parent);
>
> +static int __check_submounts_and_drop(struct dentry *parent,
> + struct list_head *dispose)
> +{
> + struct dentry *this_parent;
> + struct list_head *next;
> + unsigned seq;
> + int found = 0;
> + int locked = 0;
> +
> + seq = read_seqbegin(&rename_lock);
> +again:
> + this_parent = parent;
> + spin_lock(&this_parent->d_lock);
> +repeat:
> + next = this_parent->d_subdirs.next;
> +resume:
> + while (next != &this_parent->d_subdirs) {
> + struct list_head *tmp = next;
> + struct dentry *dentry;
> +
> + dentry = list_entry(tmp, struct dentry, d_u.d_child);
> + next = tmp->next;
> +
> + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
> + if (d_mountpoint(dentry)) {
> + spin_unlock(&dentry->d_lock);
> + found = -EBUSY;
> + goto out;
> + }
> +
> + /*
> + * move only zero ref count dentries to the dispose list.
> + *
> + * Those which are presently on the shrink list, being processed
> + * by shrink_dentry_list(), shouldn't be moved. Otherwise the
> + * loop in shrink_dcache_parent() might not make any progress
> + * and loop forever.
> + */
> + if (dentry->d_count) {
> + dentry_lru_del(dentry);
> + } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
> + dentry_lru_move_list(dentry, dispose);
> + dentry->d_flags |= DCACHE_SHRINK_LIST;
> + found++;
> + }
> + /*
> + * We can return to the caller if we have found some (this
> + * ensures forward progress). We'll be coming back to find
> + * the rest.
> + */
> + if (found && need_resched()) {
> + spin_unlock(&dentry->d_lock);
> + goto out;
> + }
> +
> + /*
> + * Descend a level if the d_subdirs list is non-empty.
> + */
> + if (!list_empty(&dentry->d_subdirs)) {
> + spin_unlock(&this_parent->d_lock);
> + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
> + this_parent = dentry;
> + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
> + goto repeat;
> + }
> +
> + spin_unlock(&dentry->d_lock);
> + }
> + /*
> + * All done at this level ... ascend and resume the search.
> + */
> + if (this_parent != parent) {
> + struct dentry *child = this_parent;
> + this_parent = try_to_ascend(this_parent, locked, seq);
> + if (!this_parent)
> + goto rename_retry;
> + next = child->d_u.d_child.next;
> + goto resume;
> + }
> + if (!locked && read_seqretry(&rename_lock, seq)) {
> + spin_unlock(&this_parent->d_lock);
> + goto rename_retry;
> + }
> + if (d_mountpoint(this_parent))
> + found = -EBUSY;
> + if (!found)
> + __d_drop(this_parent);
> +out:
> + spin_unlock(&this_parent->d_lock);
> +
> + if (locked)
> + write_sequnlock(&rename_lock);
> + return found;
> +
> +rename_retry:
> + if (found)
> + return found;
> + if (locked)
> + goto again;
> + locked = 1;
> + write_seqlock(&rename_lock);
> + goto again;
> +}
> +
> +/**
> + * check_submounts_and_drop - prune dcache, check for submounts and drop
> + *
> + * All done as a single atomic operation relative to has_unlinked_ancestor().
> + * Returns 0 if successfully unhashed @parent. If there were submounts then
> + * return -EBUSY.
> + *
> + * @dentry: dentry to prune and drop
> + */
> +int check_submounts_and_drop(struct dentry *dentry)
> +{
> + int ret = 0;
> +
> + /* Negative dentries can be dropped without further checks */
> + if (!dentry->d_inode) {
> + d_drop(dentry);
> + goto out;
> + }
> +
> + spin_lock(&dentry->d_lock);
> + if (d_unhashed(dentry))
> + goto out_unlock;
> + if (list_empty(&dentry->d_subdirs)) {
> + if (d_mountpoint(dentry)) {
> + ret = -EBUSY;
> + goto out_unlock;
> + }
> + __d_drop(dentry);
> + goto out_unlock;
> + }
> + spin_unlock(&dentry->d_lock);
> +
> + for (;;) {
> + LIST_HEAD(dispose);
> + ret = __check_submounts_and_drop(dentry, &dispose);
> + if (!list_empty(&dispose))
> + shrink_dentry_list(&dispose);
> +
> + if (ret <= 0)
> + break;
> +
> + cond_resched();
> + }
> +
> +out:
> + return ret;
> +
> +out_unlock:
> + spin_unlock(&dentry->d_lock);
> + goto out;
> +}
> +EXPORT_SYMBOL(check_submounts_and_drop);
> +
> /**
> * __d_alloc - allocate a dcache entry
> * @sb: filesystem it will belong to
> diff --git a/include/linux/dcache.h b/include/linux/dcache.h
> index b90337c..41b21ca 100644
> --- a/include/linux/dcache.h
> +++ b/include/linux/dcache.h
> @@ -251,6 +251,7 @@ extern void d_prune_aliases(struct inode *);
>
> /* test whether we have any submounts in a subdir tree */
> extern int have_submounts(struct dentry *);
> +extern int check_submounts_and_drop(struct dentry *);
>
> /*
> * This adds the entry to the hash queues.
Nice work...
Looks reasonable overall. The only (extremely minor) nit I have is that
it might be nice to replace some of the direct struct list_head
accesses with helper macros.
Acked-by: Jeff Layton <[email protected]>
On Thu, 8 Aug 2013 17:24:43 +0200
Miklos Szeredi <[email protected]> wrote:
> From: Miklos Szeredi <[email protected]>
>
> We check submounts before doing d_drop() on a non-empty directory dentry in
> NFS (have_submounts()), but we do not exclude a racing mount. Nor do we
> prevent mounts to be added to the disconnected subtree using relative paths
> after the d_drop().
>
> This patch fixes these issues by checking for unlinked (unhashed, non-root)
> ancestors before proceeding with the mount. This is done after setting
> DCACHE_MOUNTED on the soon-to-be mountpoint and with the rename seqlock
> taken for write. This ensures that the only one of
> check_submounts_and_drop() or has_unlinked_ancestor() can succeed.
>
> Signed-off-by: Miklos Szeredi <[email protected]>
> CC: David Howells <[email protected]>
> CC: Steven Whitehouse <[email protected]>
> CC: Trond Myklebust <[email protected]>
> CC: Greg Kroah-Hartman <[email protected]>
> ---
> fs/dcache.c | 35 +++++++++++++++++++++++++++++++++++
> fs/internal.h | 1 +
> fs/namespace.c | 9 +++++++++
> 3 files changed, 45 insertions(+)
>
> diff --git a/fs/dcache.c b/fs/dcache.c
> index 020004d..1333445 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -1103,6 +1103,41 @@ rename_retry:
> }
> EXPORT_SYMBOL(have_submounts);
>
> +static bool __has_unlinked_ancestor(struct dentry *dentry)
> +{
> + struct dentry *this;
> +
> + for (this = dentry; !IS_ROOT(this); this = this->d_parent) {
> + int is_unhashed;
> +
> + /* Need exclusion wrt. check_submounts_and_drop() */
> + spin_lock(&this->d_lock);
> + is_unhashed = d_unhashed(this);
> + spin_unlock(&this->d_lock);
> +
> + if (is_unhashed)
> + return true;
> + }
> + return false;
> +}
> +
> +/*
> + * Called by mount code to check if the mountpoint is reachable (e.g. NFS can
> + * unhash a directory dentry and then the complete subtree can become
> + * unreachable).
> + */
> +bool has_unlinked_ancestor(struct dentry *dentry)
> +{
> + bool found;
> +
> + /* Need exclusion wrt. check_submounts_and_drop() */
> + write_seqlock(&rename_lock);
> + found = __has_unlinked_ancestor(dentry);
> + write_sequnlock(&rename_lock);
> +
> + return found;
> +}
> +
> /*
> * Search the dentry child list of the specified parent,
> * and move any unused dentries to the end of the unused
> diff --git a/fs/internal.h b/fs/internal.h
> index 7c5f01c..d232355 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -126,6 +126,7 @@ extern int invalidate_inodes(struct super_block *, bool);
> * dcache.c
> */
> extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
> +extern bool has_unlinked_ancestor(struct dentry *dentry);
>
> /*
> * read_write.c
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 7b1ca9b..bb92a9c 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -634,6 +634,15 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry)
> }
> dentry->d_flags |= DCACHE_MOUNTED;
> spin_unlock(&dentry->d_lock);
> +
> + if (has_unlinked_ancestor(dentry)) {
> + spin_lock(&dentry->d_lock);
> + dentry->d_flags &= ~DCACHE_MOUNTED;
> + spin_unlock(&dentry->d_lock);
> + kfree(mp);
> + return ERR_PTR(-ENOENT);
> + }
> +
> mp->m_dentry = dentry;
> mp->m_count = 1;
> list_add(&mp->m_hash, chain);
Looks reasonable. For posterity, it might be worth mentioning the
potential regression that you described earlier (racing with rename
from another host). That way if someone hits it in the future they
might be able to zero in on this change as the culprit more easily.
Acked-by: Jeff Layton <[email protected]>
On Thu, 8 Aug 2013 17:24:46 +0200
Miklos Szeredi <[email protected]> wrote:
> From: Miklos Szeredi <[email protected]>
>
> Do have_submounts(), shrink_dcache_parent() and d_drop() atomically.
>
> check_submounts_and_drop() can deal with negative dentries and
> non-directories as well.
>
> Non-directories can also be mounted on. And just like directories we don't
> want these to disappear with invalidation.
>
> Signed-off-by: Miklos Szeredi <[email protected]>
> CC: Trond Myklebust <[email protected]>
> ---
> fs/nfs/dir.c | 9 ++++-----
> 1 file changed, 4 insertions(+), 5 deletions(-)
>
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index e474ca2b..7468735 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -1135,14 +1135,13 @@ out_zap_parent:
> if (inode && S_ISDIR(inode->i_mode)) {
> /* Purge readdir caches. */
> nfs_zap_caches(inode);
> - /* If we have submounts, don't unhash ! */
> - if (have_submounts(dentry))
> - goto out_valid;
> if (dentry->d_flags & DCACHE_DISCONNECTED)
> goto out_valid;
> - shrink_dcache_parent(dentry);
> }
> - d_drop(dentry);
> + /* If we have submounts, don't unhash ! */
> + if (check_submounts_and_drop(dentry) != 0)
> + goto out_valid;
> +
> dput(parent);
> dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
> __func__, dentry->d_parent->d_name.name,
Acked-by: Jeff Layton <[email protected]>
On Thu, Aug 08, 2013 at 05:24:41PM +0200, Miklos Szeredi wrote:
> Here's a series for fixing issues with d_drop on a directory dentry with
> children and adding support for such dropped directories in fuse.
>
> This one fixes a couple of issues I found with the previous incarnation and
> split out the filesystem parts into individual patches.
OK in general, but I'm not happy with the proliferation of such iterators in
fs/dcache.c ;-/
We have have_submounts(), select_parent() and d_genocide(), with one more
such sucker added to the pile. Sure, most of the callers of have_submounts()
are gone after that patchset, but we still have several left:
fs/autofs4/dev-ioctl.c:542: err = have_submounts(path.dentry);
fs/autofs4/root.c:381: if (have_submounts(dentry)) {
fs/autofs4/waitq.c:338: if (have_submounts(dentry))
fs/nilfs2/super.c:1010: if (have_submounts(root_dentry))
The thing is, the same race with mount() does, AFAICS, apply at least to
autofs4 call sites.
And I would like to understand what nilfs one is trying to do...
Unless I'm seriously misreading that code, it's *not* on any kind of a hot
path, so I really wonder why don't we simply do shrink_dcache_parent() +
check if d_count has dropped to 1, without trying to look for submounts
first - if we have any, shrink_dcache_parent() is simply going to leave
us with d_count > 1 and that's it. Actually, it's cheaper that way -
no need to walk the tree twice. Moreover, checking for d_count == 1
case first is also pointless - in that case we have no children at all
and shrink_dcache_parent() will return immediately. Could nilfs folks
explain why (and if) we need to bother with all that complexity there?
We are really getting too many tree walkers in fs/dcache.c and
all that duplication is the prime breeding ground for bugs ;-/
On Wed, 21 Aug 2013 06:40:56 +0100, Al Viro wrote:
> And I would like to understand what nilfs one is trying to do...
> Unless I'm seriously misreading that code, it's *not* on any kind of a hot
> path, so I really wonder why don't we simply do shrink_dcache_parent() +
> check if d_count has dropped to 1, without trying to look for submounts
> first - if we have any, shrink_dcache_parent() is simply going to leave
> us with d_count > 1 and that's it. Actually, it's cheaper that way -
> no need to walk the tree twice.
> We are really getting too many tree walkers in fs/dcache.c and
> all that duplication is the prime breeding ground for bugs ;-/
I agree that we can eliminate have_submounts() from nilfs. Please
apply the following patch if you hope so.
> Moreover, checking for d_count == 1
> case first is also pointless - in that case we have no children at all
> and shrink_dcache_parent() will return immediately.
This also looks true. I will confirm whether we can remove the
pre-check for d_count == 1 case.
Regards,
Ryusuke Konishi
--
From: Ryusuke Konishi <[email protected]>
Date: Thu, 22 Aug 2013 01:53:03 +0900
Subject: [PATCH] nilfs2: do not use have_submounts()
The pre-check with have_submounts() in
nilfs_try_to_shrink_tree(root_dentry) is eliminable since the return
value of the function does not depend on whether the root_dentry has
sub-mounts or not. Removing the have_submount() pre-check may cause
unnecessary dcache shrinking, but the impact looks limited because
nilfs_try_to_shrink_tree() is not called in usual operation.
So, we remove use of have_submounts() from nilfs.
Signed-off-by: Ryusuke Konishi <[email protected]>
---
fs/nilfs2/super.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index af3ba04..38fe240 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1007,8 +1007,6 @@ static int nilfs_tree_was_touched(struct dentry *root_dentry)
*/
static int nilfs_try_to_shrink_tree(struct dentry *root_dentry)
{
- if (have_submounts(root_dentry))
- return true;
shrink_dcache_parent(root_dentry);
return nilfs_tree_was_touched(root_dentry);
}
--
1.7.9.3
On Thu, Aug 22, 2013 at 05:04:59AM +0900, Ryusuke Konishi wrote:
> On Wed, 21 Aug 2013 06:40:56 +0100, Al Viro wrote:
> > And I would like to understand what nilfs one is trying to do...
> > Unless I'm seriously misreading that code, it's *not* on any kind of a hot
> > path, so I really wonder why don't we simply do shrink_dcache_parent() +
> > check if d_count has dropped to 1, without trying to look for submounts
> > first - if we have any, shrink_dcache_parent() is simply going to leave
> > us with d_count > 1 and that's it. Actually, it's cheaper that way -
> > no need to walk the tree twice.
>
> > We are really getting too many tree walkers in fs/dcache.c and
> > all that duplication is the prime breeding ground for bugs ;-/
>
> I agree that we can eliminate have_submounts() from nilfs. Please
> apply the following patch if you hope so.
>
> > Moreover, checking for d_count == 1
> > case first is also pointless - in that case we have no children at all
> > and shrink_dcache_parent() will return immediately.
>
> This also looks true. I will confirm whether we can remove the
> pre-check for d_count == 1 case.
Umm... How about the following, then?
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index af3ba04..7ac2a12 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -994,23 +994,16 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
return ret;
}
-static int nilfs_tree_was_touched(struct dentry *root_dentry)
-{
- return d_count(root_dentry) > 1;
-}
-
/**
- * nilfs_try_to_shrink_tree() - try to shrink dentries of a checkpoint
+ * nilfs_tree_is_busy() - try to shrink dentries of a checkpoint
* @root_dentry: root dentry of the tree to be shrunk
*
* This function returns true if the tree was in-use.
*/
-static int nilfs_try_to_shrink_tree(struct dentry *root_dentry)
+static bool nilfs_tree_is_busy(struct dentry *root_dentry)
{
- if (have_submounts(root_dentry))
- return true;
shrink_dcache_parent(root_dentry);
- return nilfs_tree_was_touched(root_dentry);
+ return d_count(root_dentry) > 1;
}
int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
@@ -1034,8 +1027,7 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
if (inode) {
dentry = d_find_alias(inode);
if (dentry) {
- if (nilfs_tree_was_touched(dentry))
- ret = nilfs_try_to_shrink_tree(dentry);
+ ret = nilfs_tree_is_busy(dentry);
dput(dentry);
}
iput(inode);
@@ -1331,11 +1323,8 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
s->s_flags |= MS_ACTIVE;
} else if (!sd.cno) {
- int busy = false;
-
- if (nilfs_tree_was_touched(s->s_root)) {
- busy = nilfs_try_to_shrink_tree(s->s_root);
- if (busy && (flags ^ s->s_flags) & MS_RDONLY) {
+ if (nilfs_tree_is_busy(s->s_root)) {
+ if ((flags ^ s->s_flags) & MS_RDONLY) {
printk(KERN_ERR "NILFS: the device already "
"has a %s mount.\n",
(s->s_flags & MS_RDONLY) ?
@@ -1343,8 +1332,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
err = -EBUSY;
goto failed_super;
}
- }
- if (!busy) {
+ } else {
/*
* Try remount to setup mount states if the current
* tree is not mounted and only snapshots use this sb.
On Wed, 21 Aug 2013 22:00:38 +0100, Al Viro wrote:
> On Thu, Aug 22, 2013 at 05:04:59AM +0900, Ryusuke Konishi wrote:
>> On Wed, 21 Aug 2013 06:40:56 +0100, Al Viro wrote:
>> > And I would like to understand what nilfs one is trying to do...
>> > Unless I'm seriously misreading that code, it's *not* on any kind of a hot
>> > path, so I really wonder why don't we simply do shrink_dcache_parent() +
>> > check if d_count has dropped to 1, without trying to look for submounts
>> > first - if we have any, shrink_dcache_parent() is simply going to leave
>> > us with d_count > 1 and that's it. Actually, it's cheaper that way -
>> > no need to walk the tree twice.
>>
>> > We are really getting too many tree walkers in fs/dcache.c and
>> > all that duplication is the prime breeding ground for bugs ;-/
>>
>> I agree that we can eliminate have_submounts() from nilfs. Please
>> apply the following patch if you hope so.
>>
>> > Moreover, checking for d_count == 1
>> > case first is also pointless - in that case we have no children at all
>> > and shrink_dcache_parent() will return immediately.
>>
>> This also looks true. I will confirm whether we can remove the
>> pre-check for d_count == 1 case.
>
> Umm... How about the following, then?
>
I confirmed the pre-check is also eliminable.
Then, yes, the following change is ok.
Reviewed-by: Ryusuke Konishi <[email protected]>
Regards,
Ryusuke Konishi
> diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
> index af3ba04..7ac2a12 100644
> --- a/fs/nilfs2/super.c
> +++ b/fs/nilfs2/super.c
> @@ -994,23 +994,16 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
> return ret;
> }
>
> -static int nilfs_tree_was_touched(struct dentry *root_dentry)
> -{
> - return d_count(root_dentry) > 1;
> -}
> -
> /**
> - * nilfs_try_to_shrink_tree() - try to shrink dentries of a checkpoint
> + * nilfs_tree_is_busy() - try to shrink dentries of a checkpoint
> * @root_dentry: root dentry of the tree to be shrunk
> *
> * This function returns true if the tree was in-use.
> */
> -static int nilfs_try_to_shrink_tree(struct dentry *root_dentry)
> +static bool nilfs_tree_is_busy(struct dentry *root_dentry)
> {
> - if (have_submounts(root_dentry))
> - return true;
> shrink_dcache_parent(root_dentry);
> - return nilfs_tree_was_touched(root_dentry);
> + return d_count(root_dentry) > 1;
> }
>
> int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
> @@ -1034,8 +1027,7 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
> if (inode) {
> dentry = d_find_alias(inode);
> if (dentry) {
> - if (nilfs_tree_was_touched(dentry))
> - ret = nilfs_try_to_shrink_tree(dentry);
> + ret = nilfs_tree_is_busy(dentry);
> dput(dentry);
> }
> iput(inode);
> @@ -1331,11 +1323,8 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
>
> s->s_flags |= MS_ACTIVE;
> } else if (!sd.cno) {
> - int busy = false;
> -
> - if (nilfs_tree_was_touched(s->s_root)) {
> - busy = nilfs_try_to_shrink_tree(s->s_root);
> - if (busy && (flags ^ s->s_flags) & MS_RDONLY) {
> + if (nilfs_tree_is_busy(s->s_root)) {
> + if ((flags ^ s->s_flags) & MS_RDONLY) {
> printk(KERN_ERR "NILFS: the device already "
> "has a %s mount.\n",
> (s->s_flags & MS_RDONLY) ?
> @@ -1343,8 +1332,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
> err = -EBUSY;
> goto failed_super;
> }
> - }
> - if (!busy) {
> + } else {
> /*
> * Try remount to setup mount states if the current
> * tree is not mounted and only snapshots use this sb.
Ian,
I'm having problems fully understanding what autofs4 is trying to do
with have_submounts().
> On Wed, 2013-08-21 at 06:40 +0100, Al Viro wrote:
> fs/autofs4/dev-ioctl.c:542: err = have_submounts(path.dentry);
This is an ioctl() asking whether we have anything mounted on the autofs
mount. Using have_submounts() and then a separate follow_down() looks
racy. have_submounts() could succeed and then follow_down() could fail.
Or the other way round. Shouldn't the two cases be handled separately
here? If the autofs is a just a simple trigger then use follow_down().
If it's a multi-mount thing, then use have_submounts().
What is the userspace automount daemon using this for? Do we really
need the recursive check for submounts?
> fs/autofs4/root.c:381: if (have_submounts(dentry)) {
Here it explicitly says it's for v5 and for rootless mutli-mount. So
for example:
/mnt/auto/ root of an indirect mount
/mnt/auto/foo directory with DCACHE_NEED_AUTOMOUNT
/mnt/auto/foo/bar directory without DCACHE_NEED_AUTOMOUNT
/mnt/auto/foo/bar/baz directory with an automount trigger mounted on it
In this case when d_automount for "foo" is called we don't call the
userspace daemon because things are mounted under foo. If there was no
trigger under baz, then we would try to handle "foo" as an indirect
mount and call userspace.
But it's pretty confusing. Do we really *ever* need to call automount
on "foo" if it was part of a multi-mount thing?
> fs/autofs4/waitq.c:338: if (have_submounts(dentry))
And here we re-validate the thing after taking another autofs4 lock.
Why this double checking?
Thanks,
Miklos
On Thu, 2013-08-29 at 05:51 +0200, Miklos Szeredi wrote:
> Ian,
>
> I'm having problems fully understanding what autofs4 is trying to do
> with have_submounts().
OK, I don't really care how I do it so I'm happy to change.
>
>
> > On Wed, 2013-08-21 at 06:40 +0100, Al Viro wrote:
>
> > fs/autofs4/dev-ioctl.c:542: err = have_submounts(path.dentry);
>
> This is an ioctl() asking whether we have anything mounted on the autofs
> mount. Using have_submounts() and then a separate follow_down() looks
> racy. have_submounts() could succeed and then follow_down() could fail.
> Or the other way round. Shouldn't the two cases be handled separately
> here? If the autofs is a just a simple trigger then use follow_down().
> If it's a multi-mount thing, then use have_submounts().
Right but IIRC I don't think I actually use the returned s_magic ATM but
I use the return of have_submounts() a lot.
>
> What is the userspace automount daemon using this for? Do we really
> need the recursive check for submounts?
>
>
> > fs/autofs4/root.c:381: if (have_submounts(dentry)) {
>
> Here it explicitly says it's for v5 and for rootless mutli-mount. So
> for example:
>
> /mnt/auto/ root of an indirect mount
or the root of direct mount for that matter.
> /mnt/auto/foo directory with DCACHE_NEED_AUTOMOUNT
> /mnt/auto/foo/bar directory without DCACHE_NEED_AUTOMOUNT
> /mnt/auto/foo/bar/baz directory with an automount trigger mounted on it
>
> In this case when d_automount for "foo" is called we don't call the
> userspace daemon because things are mounted under foo. If there was no
> trigger under baz, then we would try to handle "foo" as an indirect
> mount and call userspace.
>
> But it's pretty confusing. Do we really *ever* need to call automount
> on "foo" if it was part of a multi-mount thing?
That's right, the directory isn't simple_empty() so there's no callback.
The problem is we can't just use the fact that the directory is empty to
determine that there are no mounts at all underneath.
I understand your thinking, about deciding whether to callback to the
daemon, but that's not what the ioctl above is used for.
The main use is to be able to find out if the given directory is a
mountpoint as defined by the description in the comment above the
function. This saves having to scan the mount table to find out and is a
huge saving on systems with lots of mounts. In the past I've often
needed an answer the question "is this an autofs mount or some other
type" and that's why I stick s_magic in the return as well.
>
> > fs/autofs4/waitq.c:338: if (have_submounts(dentry))
>
> And here we re-validate the thing after taking another autofs4 lock.
> Why this double checking?
This is a different case and is often not in play at times when autofs
is checking if the directory is a "mountpoint". Such as when trying to
re-construct a tree of mounts at startup.
The check in waitq.c above "is" used to validate the need to callback to
the daemon to request a mount.
As I said, any suggestions how to achieve this without calling
have_submounts() are welcome.
Ian
On Fri, 2013-08-30 at 07:24 +0800, Ian Kent wrote:
> On Thu, 2013-08-29 at 05:51 +0200, Miklos Szeredi wrote:
> > Ian,
> >
> > I'm having problems fully understanding what autofs4 is trying to do
> > with have_submounts().
>
> OK, I don't really care how I do it so I'm happy to change.
>
> >
> >
> > > On Wed, 2013-08-21 at 06:40 +0100, Al Viro wrote:
> >
> > > fs/autofs4/dev-ioctl.c:542: err = have_submounts(path.dentry);
> >
> > This is an ioctl() asking whether we have anything mounted on the autofs
> > mount. Using have_submounts() and then a separate follow_down() looks
> > racy. have_submounts() could succeed and then follow_down() could fail.
> > Or the other way round. Shouldn't the two cases be handled separately
> > here? If the autofs is a just a simple trigger then use follow_down().
> > If it's a multi-mount thing, then use have_submounts().
>
> Right but IIRC I don't think I actually use the returned s_magic ATM but
> I use the return of have_submounts() a lot.
>
> >
> > What is the userspace automount daemon using this for? Do we really
> > need the recursive check for submounts?
> >
> >
> > > fs/autofs4/root.c:381: if (have_submounts(dentry)) {
> >
> > Here it explicitly says it's for v5 and for rootless mutli-mount. So
> > for example:
> >
> > /mnt/auto/ root of an indirect mount
>
> or the root of direct mount for that matter.
>
> > /mnt/auto/foo directory with DCACHE_NEED_AUTOMOUNT
> > /mnt/auto/foo/bar directory without DCACHE_NEED_AUTOMOUNT
> > /mnt/auto/foo/bar/baz directory with an automount trigger mounted on it
> >
> > In this case when d_automount for "foo" is called we don't call the
> > userspace daemon because things are mounted under foo. If there was no
> > trigger under baz, then we would try to handle "foo" as an indirect
> > mount and call userspace.
> >
> > But it's pretty confusing. Do we really *ever* need to call automount
> > on "foo" if it was part of a multi-mount thing?
>
> That's right, the directory isn't simple_empty() so there's no callback.
>
> The problem is we can't just use the fact that the directory is empty to
> determine that there are no mounts at all underneath.
>
> I understand your thinking, about deciding whether to callback to the
> daemon, but that's not what the ioctl above is used for.
>
> The main use is to be able to find out if the given directory is a
> mountpoint as defined by the description in the comment above the
> function. This saves having to scan the mount table to find out and is a
> huge saving on systems with lots of mounts. In the past I've often
> needed an answer the question "is this an autofs mount or some other
> type" and that's why I stick s_magic in the return as well.
>
> >
> > > fs/autofs4/waitq.c:338: if (have_submounts(dentry))
> >
> > And here we re-validate the thing after taking another autofs4 lock.
> > Why this double checking?
>
> This is a different case and is often not in play at times when autofs
> is checking if the directory is a "mountpoint". Such as when trying to
> re-construct a tree of mounts at startup.
>
> The check in waitq.c above "is" used to validate the need to callback to
> the daemon to request a mount.
>
> As I said, any suggestions how to achieve this without calling
> have_submounts() are welcome.
You know, may_umount_tree() would do this for me (I think) and would be
much less expensive ....
>
> Ian
>
On Fri, Aug 30, 2013 at 1:44 AM, Ian Kent <[email protected]> wrote:
>> The check in waitq.c above "is" used to validate the need to callback to
>> the daemon to request a mount.
Okay. But then shouldn't the check be "if (d_mountpoint(dentry)) valid = 0;" ?
Thanks,
Miklos
On Fri, 2013-08-30 at 10:59 +0200, Miklos Szeredi wrote:
> On Fri, Aug 30, 2013 at 1:44 AM, Ian Kent <[email protected]> wrote:
>
> >> The check in waitq.c above "is" used to validate the need to callback to
> >> the daemon to request a mount.
>
> Okay. But then shouldn't the check be "if (d_mountpoint(dentry)) valid = 0;" ?
I don't think so.
But again, may_umount() might do what's needed here too.
I don't think this is enough because it doesn't cover the case where the
dentry is not simple_empty() but has no mounts below. That's not a
normal use case but could happen if the daemon crashed at just the wrong
time, encountered an error condition that didn't allow it to cleanup
directories, or a user umounted triggers within the tree before starting
the daemon.
>
> Thanks,
> Miklos
On Sun, 2013-09-01 at 08:56 +0800, Ian Kent wrote:
> On Fri, 2013-08-30 at 10:59 +0200, Miklos Szeredi wrote:
> > On Fri, Aug 30, 2013 at 1:44 AM, Ian Kent <[email protected]> wrote:
> >
> > >> The check in waitq.c above "is" used to validate the need to callback to
> > >> the daemon to request a mount.
> >
> > Okay. But then shouldn't the check be "if (d_mountpoint(dentry)) valid = 0;" ?
>
> I don't think so.
>
> But again, may_umount() might do what's needed here too.
Oh, hang on, may_umount() can't be used for the root-less multi-mount
case at all, since there's no vfsmount at the base of the tree the check
can't be restricted to just the tree of subdirs that needs to be
checked. That's been a problem for me for a long time.
>
> I don't think this is enough because it doesn't cover the case where the
> dentry is not simple_empty() but has no mounts below. That's not a
> normal use case but could happen if the daemon crashed at just the wrong
> time, encountered an error condition that didn't allow it to cleanup
> directories, or a user umounted triggers within the tree before starting
> the daemon.
>
> >
> > Thanks,
> > Miklos
>