From: Erez Zadok <ezk@cs.sunysb.edu>
To: akpm@linux-foundation.org
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
       viro@ftp.linux.org.uk, hch@infradead.org,
       Erez Zadok <ezk@cs.sunysb.edu>
Subject: [PATCH 19/30] Unionfs: prevent false lockdep warnings in stacking
Date: Fri, 28 Dec 2007 15:42:53 -0500
Message-Id: <1198874594640-git-send-email-ezk@cs.sunysb.edu>
In-Reply-To: <11988745841003-git-send-email-ezk@cs.sunysb.edu>
References: <11988745841003-git-send-email-ezk@cs.sunysb.edu>
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 9334
Lines: 246

A stackable file system like unionfs often performs an operation on a lower
file system, by calling a vfs_* method, having been called possibly by the
very same method from the VFS.  Both calls to the vfs_* method grab a lock
in the same lock class, and hence lockdep complains.  This warning is a
false positive in instances where unionfs only calls the vfs_* method on
lower objects; there's a strict lock ordering here: upper objects first,
then lower objects.

We want to prevent these false positives so that lockdep will not shutdown
so it'd still be able to warn us about potentially true locking problems.
So, we temporarily turn off lockdep ONLY AROUND the calls to vfs methods to
which we pass lower objects, and only for those instances where lockdep
complained.  While this solution may seem unclean, it is not without
precedent: other places in the kernel also do similar temporary disabling,
of course after carefully having checked that it is the right thing to do.

In the long run, lockdep needs to be taught how to handle about stacking.
Then this patch can be removed.  It is likely that such lockdep-stacking
support will do essentially the same as this patch: consider the same
ordering (upper then lower) and consider upper vs. lower locks to be in
different classes.

Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
---
 Documentation/filesystems/unionfs/issues.txt |   10 +++++++---
 fs/unionfs/copyup.c                          |    3 +++
 fs/unionfs/inode.c                           |   21 +++++++++++++++++++--
 fs/unionfs/rename.c                          |   21 +++++++++++----------
 fs/unionfs/super.c                           |    4 ++++
 fs/unionfs/unlink.c                          |   12 ++++++++++--
 6 files changed, 54 insertions(+), 17 deletions(-)

diff --git a/Documentation/filesystems/unionfs/issues.txt b/Documentation/filesystems/unionfs/issues.txt
index bb6ab05..f4b7e7e 100644
--- a/Documentation/filesystems/unionfs/issues.txt
+++ b/Documentation/filesystems/unionfs/issues.txt
@@ -17,8 +17,12 @@ KNOWN Unionfs 2.x ISSUES:
    an upper object, and then a lower object, in a strict order to avoid
    locking problems; in addition, Unionfs, as a fan-out file system, may
    have to lock several lower inodes.  We are currently looking into Lockdep
-   to see how to make it aware of stackable file systems.  In the meantime,
-   if you get any warnings from Lockdep, you can safely ignore them (or feel
-   free to report them to the Unionfs maintainers, just to be sure).
+   to see how to make it aware of stackable file systems.  For now, we
+   temporarily disable lockdep when calling vfs methods on lower objects,
+   but only for those places where lockdep complained.  While this solution
+   may seem unclean, it is not without precedent: other places in the kernel
+   also do similar temporary disabling, of course after carefully having
+   checked that it is the right thing to do.  Anyway, you get any warnings
+   from Lockdep, please report them to the Unionfs maintainers.
 
 For more information, see <http://unionfs.filesystems.org/>.
diff --git a/fs/unionfs/copyup.c b/fs/unionfs/copyup.c
index f48209f..0012caf 100644
--- a/fs/unionfs/copyup.c
+++ b/fs/unionfs/copyup.c
@@ -297,11 +297,14 @@ static int __copyup_reg_data(struct dentry *dentry,
 			break;
 		}
 
+		/* see Documentation/filesystems/unionfs/issues.txt */
+		lockdep_off();
 		write_bytes =
 			output_file->f_op->write(output_file,
 						 (char __user *)buf,
 						 read_bytes,
 						 &output_file->f_pos);
+		lockdep_on();
 		if ((write_bytes < 0) || (write_bytes < read_bytes)) {
 			err = write_bytes;
 			break;
diff --git a/fs/unionfs/inode.c b/fs/unionfs/inode.c
index 3df9b19..4890f42 100644
--- a/fs/unionfs/inode.c
+++ b/fs/unionfs/inode.c
@@ -80,7 +80,10 @@ static int unionfs_create(struct inode *parent, struct dentry *dentry,
 			struct dentry *lower_dir_dentry;
 
 			lower_dir_dentry = lock_parent(wh_dentry);
+			/* see Documentation/filesystems/unionfs/issues.txt */
+			lockdep_off();
 			err = vfs_unlink(lower_dir_dentry->d_inode, wh_dentry);
+			lockdep_on();
 			unlock_dir(lower_dir_dentry);
 
 			/*
@@ -262,9 +265,13 @@ static int unionfs_link(struct dentry *old_dentry, struct inode *dir,
 		/* found a .wh.foo entry, unlink it and then call vfs_link() */
 		lower_dir_dentry = lock_parent(whiteout_dentry);
 		err = is_robranch_super(new_dentry->d_sb, dbstart(new_dentry));
-		if (!err)
+		if (!err) {
+			/* see Documentation/filesystems/unionfs/issues.txt */
+			lockdep_off();
 			err = vfs_unlink(lower_dir_dentry->d_inode,
 					 whiteout_dentry);
+			lockdep_on();
+		}
 
 		fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
 		dir->i_nlink = unionfs_get_nlinks(dir);
@@ -291,9 +298,13 @@ static int unionfs_link(struct dentry *old_dentry, struct inode *dir,
 	BUG_ON(dbstart(old_dentry) != dbstart(new_dentry));
 	lower_dir_dentry = lock_parent(lower_new_dentry);
 	err = is_robranch(old_dentry);
-	if (!err)
+	if (!err) {
+		/* see Documentation/filesystems/unionfs/issues.txt */
+		lockdep_off();
 		err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
 			       lower_new_dentry);
+		lockdep_on();
+	}
 	unlock_dir(lower_dir_dentry);
 
 docopyup:
@@ -316,10 +327,16 @@ docopyup:
 					unionfs_lower_dentry(old_dentry);
 				lower_dir_dentry =
 					lock_parent(lower_new_dentry);
+				/*
+				 * see
+				 * Documentation/filesystems/unionfs/issues.txt
+				 */
+				lockdep_off();
 				/* do vfs_link */
 				err = vfs_link(lower_old_dentry,
 					       lower_dir_dentry->d_inode,
 					       lower_new_dentry);
+				lockdep_on();
 				unlock_dir(lower_dir_dentry);
 				goto check_link;
 			}
diff --git a/fs/unionfs/rename.c b/fs/unionfs/rename.c
index 452d1e7..8b04acf 100644
--- a/fs/unionfs/rename.c
+++ b/fs/unionfs/rename.c
@@ -90,16 +90,14 @@ static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		dput(lower_wh_dentry);
 	}
 
+	err = is_robranch_super(old_dentry->d_sb, bindex);
+	if (err)
+		goto out;
+
 	dget(lower_old_dentry);
 	lower_old_dir_dentry = dget_parent(lower_old_dentry);
 	lower_new_dir_dentry = dget_parent(lower_new_dentry);
 
-	lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
-
-	err = is_robranch_super(old_dentry->d_sb, bindex);
-	if (err)
-		goto out_unlock;
-
 	/*
 	 * ready to whiteout for old_dentry. caller will create the actual
 	 * whiteout, and must dput(*wh_old)
@@ -110,7 +108,7 @@ static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 				      old_dentry->d_name.len);
 		err = PTR_ERR(whname);
 		if (unlikely(IS_ERR(whname)))
-			goto out_unlock;
+			goto out_dput;
 		*wh_old = lookup_one_len(whname, lower_old_dir_dentry,
 					 old_dentry->d_name.len +
 					 UNIONFS_WHLEN);
@@ -118,16 +116,19 @@ static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		err = PTR_ERR(*wh_old);
 		if (IS_ERR(*wh_old)) {
 			*wh_old = NULL;
-			goto out_unlock;
+			goto out_dput;
 		}
 	}
 
+	/* see Documentation/filesystems/unionfs/issues.txt */
+	lockdep_off();
+	lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
 	err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
 			 lower_new_dir_dentry->d_inode, lower_new_dentry);
-
-out_unlock:
 	unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+	lockdep_on();
 
+out_dput:
 	dput(lower_old_dir_dentry);
 	dput(lower_new_dir_dentry);
 	dput(lower_old_dentry);
diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c
index 8b70aca..45bcf89 100644
--- a/fs/unionfs/super.c
+++ b/fs/unionfs/super.c
@@ -839,7 +839,11 @@ static void unionfs_clear_inode(struct inode *inode)
 			lower_inode = unionfs_lower_inode_idx(inode, bindex);
 			if (!lower_inode)
 				continue;
+			unionfs_set_lower_inode_idx(inode, bindex, NULL);
+			/* see Documentation/filesystems/unionfs/issues.txt */
+			lockdep_off();
 			iput(lower_inode);
+			lockdep_on();
 		}
 	}
 
diff --git a/fs/unionfs/unlink.c b/fs/unionfs/unlink.c
index 423ff36..677a5ae 100644
--- a/fs/unionfs/unlink.c
+++ b/fs/unionfs/unlink.c
@@ -41,8 +41,12 @@ static int unionfs_unlink_whiteout(struct inode *dir, struct dentry *dentry)
 	/* avoid destroying the lower inode if the file is in use */
 	dget(lower_dentry);
 	err = is_robranch_super(dentry->d_sb, bindex);
-	if (!err)
+	if (!err) {
+		/* see Documentation/filesystems/unionfs/issues.txt */
+		lockdep_off();
 		err = vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
+		lockdep_on();
+	}
 	/* if vfs_unlink succeeded, update our inode's times */
 	if (!err)
 		unionfs_copy_attr_times(dentry->d_inode);
@@ -139,8 +143,12 @@ static int unionfs_rmdir_first(struct inode *dir, struct dentry *dentry,
 	/* avoid destroying the lower inode if the file is in use */
 	dget(lower_dentry);
 	err = is_robranch(dentry);
-	if (!err)
+	if (!err) {
+		/* see Documentation/filesystems/unionfs/issues.txt */
+		lockdep_off();
 		err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
+		lockdep_on();
+	}
 	dput(lower_dentry);
 
 	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
-- 
1.5.2.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/