From: Erez Zadok <ezk@cs.sunysb.edu>
To: akpm@linux-foundation.org
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
       viro@zeniv.linux.org.uk, hch@infradead.org, miklos@szeredi.hu,
       Erez Zadok <ezk@cs.sunysb.edu>
Subject: [PATCH 08/19] Unionfs: lookup overhaul using vfs_path_lookup
Date: Tue, 29 Jul 2008 22:43:38 -0400
Message-Id: <12173858372941-git-send-email-ezk@cs.sunysb.edu>
In-Reply-To: <12173858291233-git-send-email-ezk@cs.sunysb.edu>
References: <12173858291233-git-send-email-ezk@cs.sunysb.edu>
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 16426
Lines: 520

Rework the lookup code to use vfs_path_lookup as much as possible, to ensure
that we have a vfsmount at this critical stage.  This is necessary for the
upcoming VFS API change from vfs_* to path_* methods.  This change also
allows unionfs to cross bind mounts and other mounts on lower branches.

Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
---
 fs/unionfs/commonfops.c |    3 +-
 fs/unionfs/inode.c      |   18 ++-
 fs/unionfs/lookup.c     |  348 ++++++++++++++++++++++++++++++++++++++++++++++-
 fs/unionfs/rename.c     |    1 +
 fs/unionfs/union.h      |    4 +
 5 files changed, 366 insertions(+), 8 deletions(-)

diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
index 5816d41..df002d5 100644
--- a/fs/unionfs/commonfops.c
+++ b/fs/unionfs/commonfops.c
@@ -422,7 +422,8 @@ reval_dentry:
 	dgen = atomic_read(&UNIONFS_D(dentry)->generation);
 
 	if (unlikely(sbgen > dgen)) {
-		pr_debug("unionfs: retry dentry revalidation\n");
+		pr_debug("unionfs: retry dentry %s revalidation\n",
+			 dentry->d_name.name);
 		schedule();
 		goto reval_dentry;
 	}
diff --git a/fs/unionfs/inode.c b/fs/unionfs/inode.c
index bfebc0c..f81ebb6 100644
--- a/fs/unionfs/inode.c
+++ b/fs/unionfs/inode.c
@@ -178,6 +178,7 @@ static struct dentry *unionfs_lookup(struct inode *parent,
 {
 	struct path path_save = {NULL, NULL};
 	struct dentry *ret;
+	int err = 0;
 
 	unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
 	if (dentry != dentry->d_parent)
@@ -193,7 +194,14 @@ static struct dentry *unionfs_lookup(struct inode *parent,
 	 * unionfs_lookup_backend returns a locked dentry upon success,
 	 * so we'll have to unlock it below.
 	 */
-	ret = unionfs_lookup_backend(dentry, nd, INTERPOSE_LOOKUP);
+
+	/* allocate dentry private data.  We free it in ->d_release */
+	err = new_dentry_private_data(dentry, UNIONFS_DMUTEX_CHILD);
+	if (unlikely(err)) {
+		ret = ERR_PTR(err);
+		goto out;
+	}
+	ret = unionfs_lookup_full(dentry, nd, INTERPOSE_LOOKUP);
 
 	/* restore the dentry & vfsmnt in namei */
 	if (nd) {
@@ -203,6 +211,11 @@ static struct dentry *unionfs_lookup(struct inode *parent,
 	if (!IS_ERR(ret)) {
 		if (ret)
 			dentry = ret;
+		/* lookup_full can return multiple positive dentries */
+		if (dentry->d_inode && !S_ISDIR(dentry->d_inode->i_mode)) {
+			BUG_ON(dbstart(dentry) < 0);
+			unionfs_postcopyup_release(dentry);
+		}
 		unionfs_copy_attr_times(dentry->d_inode);
 		/* parent times may have changed */
 		unionfs_copy_attr_times(dentry->d_parent->d_inode);
@@ -212,9 +225,10 @@ static struct dentry *unionfs_lookup(struct inode *parent,
 	if (!IS_ERR(ret)) {
 		unionfs_check_dentry(dentry);
 		unionfs_check_nd(nd);
-		unionfs_unlock_dentry(dentry);
 	}
+	unionfs_unlock_dentry(dentry);
 
+out:
 	if (dentry != dentry->d_parent) {
 		unionfs_check_dentry(dentry->d_parent);
 		unionfs_unlock_dentry(dentry->d_parent);
diff --git a/fs/unionfs/lookup.c b/fs/unionfs/lookup.c
index 0f62087..37adf66 100644
--- a/fs/unionfs/lookup.c
+++ b/fs/unionfs/lookup.c
@@ -18,7 +18,49 @@
 
 #include "union.h"
 
-static int realloc_dentry_private_data(struct dentry *dentry);
+/*
+ * Lookup one path component @name relative to a <base,mnt> path pair.
+ * Behaves nearly the same as lookup_one_len (i.e., return negative dentry
+ * on ENOENT), but uses the @mnt passed, so it can cross bind mounts and
+ * other lower mounts properly.  If @new_mnt is non-null, will fill in the
+ * new mnt there.  Caller is responsible to dput/mntput/path_put returned
+ * @dentry and @new_mnt.
+ */
+struct dentry *__lookup_one(struct dentry *base, struct vfsmount *mnt,
+			    const char *name, struct vfsmount **new_mnt)
+{
+	struct dentry *dentry = NULL;
+	struct nameidata lower_nd;
+	int err;
+
+	/* we use flags=0 to get basic lookup */
+	err = vfs_path_lookup(base, mnt, name, 0, &lower_nd);
+
+	switch (err) {
+	case 0: /* no error */
+		dentry = lower_nd.path.dentry;
+		if (new_mnt)
+			*new_mnt = lower_nd.path.mnt; /* rc already inc'ed */
+		break;
+	case -ENOENT:
+		 /*
+		  * We don't consider ENOENT an error, and we want to return
+		  * a negative dentry (ala lookup_one_len).  As we know
+		  * there was no inode for this name before (-ENOENT), then
+		  * it's safe to call lookup_one_len (which doesn't take a
+		  * vfsmount).
+		  */
+		dentry = lookup_one_len(name, base, strlen(name));
+		if (new_mnt)
+			*new_mnt = mntget(lower_nd.path.mnt);
+		break;
+	default: /* all other real errors */
+		dentry = ERR_PTR(err);
+		break;
+	}
+
+	return dentry;
+}
 
 /*
  * Main (and complex) driver function for Unionfs's lookup
@@ -32,7 +74,8 @@ static int realloc_dentry_private_data(struct dentry *dentry);
  * dentry's info, which the caller must unlock.
  */
 struct dentry *unionfs_lookup_backend(struct dentry *dentry,
-				      struct nameidata *nd, int lookupmode)
+				      struct nameidata *nd_unused,
+				      int lookupmode)
 {
 	int err = 0;
 	struct dentry *lower_dentry = NULL;
@@ -361,6 +404,7 @@ out:
  * Caller must lock this dentry with unionfs_lock_dentry.
  *
  * Returns: 0 (ok), or -ERRNO if an error occurred.
+ * XXX: get rid of _partial_lookup and make callers call _lookup_full directly
  */
 int unionfs_partial_lookup(struct dentry *dentry)
 {
@@ -368,7 +412,8 @@ int unionfs_partial_lookup(struct dentry *dentry)
 	struct nameidata nd = { .flags = 0 };
 	int err = -ENOSYS;
 
-	tmp = unionfs_lookup_backend(dentry, &nd, INTERPOSE_PARTIAL);
+	tmp = unionfs_lookup_full(dentry, &nd, INTERPOSE_PARTIAL);
+
 	if (!tmp) {
 		err = 0;
 		goto out;
@@ -377,7 +422,7 @@ int unionfs_partial_lookup(struct dentry *dentry)
 		err = PTR_ERR(tmp);
 		goto out;
 	}
-	/* need to change the interface */
+	/* XXX: need to change the interface */
 	BUG_ON(tmp != dentry);
 out:
 	return err;
@@ -437,7 +482,7 @@ static inline int __realloc_dentry_private_data(struct dentry *dentry)
 }
 
 /* UNIONFS_D(dentry)->lock must be locked */
-static int realloc_dentry_private_data(struct dentry *dentry)
+int realloc_dentry_private_data(struct dentry *dentry)
 {
 	if (!__realloc_dentry_private_data(dentry))
 		return 0;
@@ -568,3 +613,296 @@ void release_lower_nd(struct nameidata *nd, int err)
 	kfree(nd->intent.open.file);
 #endif /* ALLOC_LOWER_ND_FILE */
 }
+
+/*
+ * Main (and complex) driver function for Unionfs's lookup
+ *
+ * Returns: NULL (ok), ERR_PTR if an error occurred, or a non-null non-error
+ * PTR if d_splice returned a different dentry.
+ *
+ * If lookupmode is INTERPOSE_PARTIAL/REVAL/REVAL_NEG, the passed dentry's
+ * inode info must be locked.  If lookupmode is INTERPOSE_LOOKUP (i.e., a
+ * newly looked-up dentry), then unionfs_lookup_backend will return a locked
+ * dentry's info, which the caller must unlock.
+ */
+struct dentry *unionfs_lookup_full(struct dentry *dentry,
+				   struct nameidata *nd_unused, int lookupmode)
+{
+	int err = 0;
+	struct dentry *lower_dentry = NULL;
+	struct vfsmount *lower_mnt;
+	struct vfsmount *lower_dir_mnt;
+	struct dentry *wh_lower_dentry = NULL;
+	struct dentry *lower_dir_dentry = NULL;
+	struct dentry *parent_dentry = NULL;
+	struct dentry *d_interposed = NULL;
+	int bindex, bstart, bend, bopaque;
+	int opaque, num_positive = 0;
+	const char *name;
+	int namelen;
+	int pos_start, pos_end;
+
+	/*
+	 * We should already have a lock on this dentry in the case of a
+	 * partial lookup, or a revalidation.  Otherwise it is returned from
+	 * new_dentry_private_data already locked.
+	 */
+	verify_locked(dentry);
+
+	/* must initialize dentry operations */
+	dentry->d_op = &unionfs_dops;
+
+	/* We never partial lookup the root directory. */
+	if (IS_ROOT(dentry))
+		goto out;
+	parent_dentry = dget_parent(dentry);
+
+	name = dentry->d_name.name;
+	namelen = dentry->d_name.len;
+
+	/* No dentries should get created for possible whiteout names. */
+	if (!is_validname(name)) {
+		err = -EPERM;
+		goto out_free;
+	}
+
+	/* Now start the actual lookup procedure. */
+	bstart = dbstart(parent_dentry);
+	bend = dbend(parent_dentry);
+	bopaque = dbopaque(parent_dentry);
+	BUG_ON(bstart < 0);
+
+	/* adjust bend to bopaque if needed */
+	if ((bopaque >= 0) && (bopaque < bend))
+		bend = bopaque;
+
+	/* lookup all possible dentries */
+	for (bindex = bstart; bindex <= bend; bindex++) {
+
+		lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
+		lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
+
+		/* skip if we already have a positive lower dentry */
+		if (lower_dentry) {
+			if (dbstart(dentry) < 0)
+				dbstart(dentry) = bindex;
+			if (bindex > dbend(dentry))
+				dbend(dentry) = bindex;
+			if (lower_dentry->d_inode)
+				num_positive++;
+			continue;
+		}
+
+		lower_dir_dentry =
+			unionfs_lower_dentry_idx(parent_dentry, bindex);
+		/* if the lower dentry's parent does not exist, skip this */
+		if (!lower_dir_dentry || !lower_dir_dentry->d_inode)
+			continue;
+
+		/* also skip it if the parent isn't a directory. */
+		if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode))
+			continue; /* XXX: should be BUG_ON */
+
+		/* check for whiteouts: stop lookup if found */
+		wh_lower_dentry = lookup_whiteout(name, lower_dir_dentry);
+		if (IS_ERR(wh_lower_dentry)) {
+			err = PTR_ERR(wh_lower_dentry);
+			goto out_free;
+		}
+		if (wh_lower_dentry->d_inode) {
+			dbend(dentry) = dbopaque(dentry) = bindex;
+			if (dbstart(dentry) < 0)
+				dbstart(dentry) = bindex;
+			dput(wh_lower_dentry);
+			break;
+		}
+		dput(wh_lower_dentry);
+
+		/* Now do regular lookup; lookup @name */
+		lower_dir_mnt = unionfs_lower_mnt_idx(parent_dentry, bindex);
+		lower_mnt = NULL; /* XXX: needed? */
+
+		lower_dentry = __lookup_one(lower_dir_dentry, lower_dir_mnt,
+					    name, &lower_mnt);
+
+		if (IS_ERR(lower_dentry)) {
+			err = PTR_ERR(lower_dentry);
+			goto out_free;
+		}
+		unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
+		BUG_ON(!lower_mnt);
+		unionfs_set_lower_mnt_idx(dentry, bindex, lower_mnt);
+
+		/* adjust dbstart/end */
+		if (dbstart(dentry) < 0)
+			dbstart(dentry) = bindex;
+		if (bindex > dbend(dentry))
+			dbend(dentry) = bindex;
+		/*
+		 * We always store the lower dentries above, and update
+		 * dbstart/dbend, even if the whole unionfs dentry is
+		 * negative (i.e., no lower inodes).
+		 */
+		if (!lower_dentry->d_inode)
+			continue;
+		num_positive++;
+
+		/*
+		 * check if we just found an opaque directory, if so, stop
+		 * lookups here.
+		 */
+		if (!S_ISDIR(lower_dentry->d_inode->i_mode))
+			continue;
+		opaque = is_opaque_dir(dentry, bindex);
+		if (opaque < 0) {
+			err = opaque;
+			goto out_free;
+		} else if (opaque) {
+			dbend(dentry) = dbopaque(dentry) = bindex;
+			break;
+		}
+		dbend(dentry) = bindex;
+
+		/* update parent directory's atime with the bindex */
+		fsstack_copy_attr_atime(parent_dentry->d_inode,
+					lower_dir_dentry->d_inode);
+	}
+
+	/* sanity checks, then decide if to process a negative dentry */
+	BUG_ON(dbstart(dentry) < 0 && dbend(dentry) >= 0);
+	BUG_ON(dbstart(dentry) >= 0 && dbend(dentry) < 0);
+
+	if (num_positive > 0)
+		goto out_positive;
+
+	/*** handle NEGATIVE dentries ***/
+
+	/*
+	 * If negative, keep only first lower negative dentry, to save on
+	 * memory.
+	 */
+	if (dbstart(dentry) < dbend(dentry)) {
+		path_put_lowers(dentry, dbstart(dentry) + 1,
+				dbend(dentry), false);
+		dbend(dentry) = dbstart(dentry);
+	}
+	if (lookupmode == INTERPOSE_PARTIAL)
+		goto out;
+	if (lookupmode == INTERPOSE_LOOKUP) {
+		/*
+		 * If all we found was a whiteout in the first available
+		 * branch, then create a negative dentry for a possibly new
+		 * file to be created.
+		 */
+		if (dbopaque(dentry) < 0)
+			goto out;
+		/* XXX: need to get mnt here */
+		bindex = dbstart(dentry);
+		if (unionfs_lower_dentry_idx(dentry, bindex))
+			goto out;
+		lower_dir_dentry =
+			unionfs_lower_dentry_idx(parent_dentry, bindex);
+		if (!lower_dir_dentry || !lower_dir_dentry->d_inode)
+			goto out;
+		if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode))
+			goto out; /* XXX: should be BUG_ON */
+		/* XXX: do we need to cross bind mounts here? */
+		lower_dentry = lookup_one_len(name, lower_dir_dentry, namelen);
+		if (IS_ERR(lower_dentry)) {
+			err = PTR_ERR(lower_dentry);
+			goto out;
+		}
+		/* XXX: need to mntget/mntput as needed too! */
+		unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
+		/* XXX: wrong mnt for crossing bind mounts! */
+		lower_mnt = unionfs_mntget(dentry->d_sb->s_root, bindex);
+		unionfs_set_lower_mnt_idx(dentry, bindex, lower_mnt);
+
+		goto out;
+	}
+
+	/* if we're revalidating a positive dentry, don't make it negative */
+	if (lookupmode != INTERPOSE_REVAL)
+		d_add(dentry, NULL);
+
+	goto out;
+
+out_positive:
+	/*** handle POSITIVE dentries ***/
+
+	/*
+	 * This unionfs dentry is positive (at least one lower inode
+	 * exists), so scan entire dentry from beginning to end, and remove
+	 * any negative lower dentries, if any.  Then, update dbstart/dbend
+	 * to reflect the start/end of positive dentries.
+	 */
+	pos_start = pos_end = -1;
+	for (bindex = bstart; bindex <= bend; bindex++) {
+		lower_dentry = unionfs_lower_dentry_idx(dentry,
+							bindex);
+		if (lower_dentry && lower_dentry->d_inode) {
+			if (pos_start < 0)
+				pos_start = bindex;
+			if (bindex > pos_end)
+				pos_end = bindex;
+			continue;
+		}
+		path_put_lowers(dentry, bindex, bindex, false);
+	}
+	if (pos_start >= 0)
+		dbstart(dentry) = pos_start;
+	if (pos_end >= 0)
+		dbend(dentry) = pos_end;
+
+	/* Partial lookups need to re-interpose, or throw away older negs. */
+	if (lookupmode == INTERPOSE_PARTIAL) {
+		if (dentry->d_inode) {
+			unionfs_reinterpose(dentry);
+			goto out;
+		}
+
+		/*
+		 * This dentry was positive, so it is as if we had a
+		 * negative revalidation.
+		 */
+		lookupmode = INTERPOSE_REVAL_NEG;
+		update_bstart(dentry);
+	}
+
+	/*
+	 * Interpose can return a dentry if d_splice returned a different
+	 * dentry.
+	 */
+	d_interposed = unionfs_interpose(dentry, dentry->d_sb, lookupmode);
+	if (IS_ERR(d_interposed))
+		err = PTR_ERR(d_interposed);
+	else if (d_interposed)
+		dentry = d_interposed;
+
+	if (!err)
+		goto out;
+	d_drop(dentry);
+
+out_free:
+	/* should dput/mntput all the underlying dentries on error condition */
+	if (dbstart(dentry) >= 0)
+		path_put_lowers_all(dentry, false);
+	/* free lower_paths unconditionally */
+	kfree(UNIONFS_D(dentry)->lower_paths);
+	UNIONFS_D(dentry)->lower_paths = NULL;
+
+out:
+	if (dentry && UNIONFS_D(dentry)) {
+		BUG_ON(dbstart(dentry) < 0 && dbend(dentry) >= 0);
+		BUG_ON(dbstart(dentry) >= 0 && dbend(dentry) < 0);
+	}
+	if (d_interposed && UNIONFS_D(d_interposed)) {
+		BUG_ON(dbstart(d_interposed) < 0 && dbend(d_interposed) >= 0);
+		BUG_ON(dbstart(d_interposed) >= 0 && dbend(d_interposed) < 0);
+	}
+
+	dput(parent_dentry);
+	if (!err && d_interposed)
+		return d_interposed;
+	return ERR_PTR(err);
+}
diff --git a/fs/unionfs/rename.c b/fs/unionfs/rename.c
index 85f96ee..ecce9da 100644
--- a/fs/unionfs/rename.c
+++ b/fs/unionfs/rename.c
@@ -195,6 +195,7 @@ static int do_unionfs_rename(struct inode *old_dir,
 		struct dentry *unlink_dentry;
 		struct dentry *unlink_dir_dentry;
 
+		BUG_ON(bindex < 0);
 		unlink_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
 		if (!unlink_dentry)
 			continue;
diff --git a/fs/unionfs/union.h b/fs/unionfs/union.h
index 9271b3b..81f34c4 100644
--- a/fs/unionfs/union.h
+++ b/fs/unionfs/union.h
@@ -294,6 +294,7 @@ static inline void unionfs_double_lock_dentry(struct dentry *d1,
 }
 
 extern int new_dentry_private_data(struct dentry *dentry, int subclass);
+extern int realloc_dentry_private_data(struct dentry *dentry);
 extern void free_dentry_private_data(struct dentry *dentry);
 extern void update_bstart(struct dentry *dentry);
 extern int init_lower_nd(struct nameidata *nd, unsigned int flags);
@@ -309,6 +310,9 @@ extern struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
 
 /* partial lookup */
 extern int unionfs_partial_lookup(struct dentry *dentry);
+extern struct dentry *unionfs_lookup_full(struct dentry *dentry,
+					  struct nameidata *nd_unused,
+					  int lookupmode);
 
 /* copies a file from dbstart to newbindex branch */
 extern int copyup_file(struct inode *dir, struct file *file, int bstart,
-- 
1.5.2.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/