The following is a series of patchsets related to Unionfs. This is the
fourth set of patchsets resulting from an lkml review of the entire unionfs
code base, in preparation for a merge into mainline. The most significant
changes here are a few locking/race bugfix related to branch-management.
These patches were tested (where appropriate) on Linus's 2.6.24 latest code
(as of v2.6.24-rc7-71-gfd0b45d), MM, as well as the backports to
2.6.{23,22,21,20,19,18,9} on ext2/3/4, xfs, reiserfs, nfs2/3/4, jffs2,
ramfs, tmpfs, cramfs, and squashfs (where available). Also tested with
LTP-full and with a continuous parallel kernel compile (while forcing cache
flushing, manipulating lower branches, etc.). See
http://unionfs.filesystems.org/ to download back-ported unionfs code.
Please pull from the 'master' branch of
git://git.kernel.org/pub/scm/linux/kernel/git/ezk/unionfs.git
to receive the following:
Erez Zadok (4):
Unionfs: merged several printk KERN_CONT together into one pr_debug
Unionfs: mmap fixes
Unionfs: branch-management related locking fixes
Unionfs: ensure we have lower dentries in d_iput
commonfops.c | 6 ++++++
debug.c | 51 +++++++++++++++++++++++++--------------------------
dentry.c | 9 +++++++--
inode.c | 17 +++++++++++++++++
mmap.c | 26 +++++++++++++++++++++-----
5 files changed, 76 insertions(+), 33 deletions(-)
---
Erez Zadok
[email protected]
Add necessary locking to dentry/inode branch-configuration, so we get
consistent values during branch-management actions. In d_revalidate_chain,
->permission, and ->create, also lock parent dentry.
Signed-off-by: Erez Zadok <[email protected]>
---
fs/unionfs/commonfops.c | 6 ++++++
fs/unionfs/dentry.c | 6 +++++-
fs/unionfs/inode.c | 17 +++++++++++++++++
3 files changed, 28 insertions(+), 1 deletions(-)
diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
index 2c32ada..f37192f 100644
--- a/fs/unionfs/commonfops.c
+++ b/fs/unionfs/commonfops.c
@@ -318,6 +318,7 @@ int unionfs_file_revalidate(struct file *file, bool willwrite)
* First revalidate the dentry inside struct file,
* but not unhashed dentries.
*/
+reval_dentry:
if (unlikely(!d_deleted(dentry) &&
!__unionfs_d_revalidate_chain(dentry, NULL, willwrite))) {
err = -ESTALE;
@@ -328,6 +329,11 @@ int unionfs_file_revalidate(struct file *file, bool willwrite)
dgen = atomic_read(&UNIONFS_D(dentry)->generation);
fgen = atomic_read(&UNIONFS_F(file)->generation);
+ if (unlikely(sbgen > dgen)) {
+ pr_debug("unionfs: retry dentry revalidation\n");
+ schedule();
+ goto reval_dentry;
+ }
BUG_ON(sbgen > dgen);
/*
diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c
index 7646828..d969640 100644
--- a/fs/unionfs/dentry.c
+++ b/fs/unionfs/dentry.c
@@ -203,7 +203,7 @@ bool is_newer_lower(const struct dentry *dentry)
if (!dentry || !UNIONFS_D(dentry))
return false;
inode = dentry->d_inode;
- if (!inode || !UNIONFS_I(inode) ||
+ if (!inode || !UNIONFS_I(inode)->lower_inodes ||
ibstart(inode) < 0 || ibend(inode) < 0)
return false;
@@ -295,6 +295,8 @@ bool __unionfs_d_revalidate_chain(struct dentry *dentry, struct nameidata *nd,
chain_len = 0;
sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
dtmp = dentry->d_parent;
+ if (dentry != dtmp)
+ unionfs_lock_dentry(dtmp, UNIONFS_DMUTEX_REVAL_PARENT);
dgen = atomic_read(&UNIONFS_D(dtmp)->generation);
/* XXX: should we check if is_newer_lower all the way up? */
if (unlikely(is_newer_lower(dtmp))) {
@@ -315,6 +317,8 @@ bool __unionfs_d_revalidate_chain(struct dentry *dentry, struct nameidata *nd,
}
purge_inode_data(dtmp->d_inode);
}
+ if (dentry != dtmp)
+ unionfs_unlock_dentry(dtmp);
while (sbgen != dgen) {
/* The root entry should always be valid */
BUG_ON(IS_ROOT(dtmp));
diff --git a/fs/unionfs/inode.c b/fs/unionfs/inode.c
index 6095c4f..e15ddb9 100644
--- a/fs/unionfs/inode.c
+++ b/fs/unionfs/inode.c
@@ -30,6 +30,13 @@ static int unionfs_create(struct inode *parent, struct dentry *dentry,
struct nameidata lower_nd;
unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
+ unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_PARENT);
+ valid = __unionfs_d_revalidate_chain(dentry->d_parent, nd, false);
+ unionfs_unlock_dentry(dentry->d_parent);
+ if (unlikely(!valid)) {
+ err = -ESTALE; /* same as what real_lookup does */
+ goto out;
+ }
unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
valid = __unionfs_d_revalidate_chain(dentry, nd, false);
@@ -936,6 +943,14 @@ static int unionfs_permission(struct inode *inode, int mask,
const int is_file = !S_ISDIR(inode->i_mode);
const int write_mask = (mask & MAY_WRITE) && !(mask & MAY_READ);
+ if (nd)
+ unionfs_lock_dentry(nd->dentry, UNIONFS_DMUTEX_CHILD);
+
+ if (!UNIONFS_I(inode)->lower_inodes) {
+ if (is_file) /* dirs can be unlinked but chdir'ed to */
+ err = -ESTALE; /* force revalidate */
+ goto out;
+ }
bstart = ibstart(inode);
bend = ibend(inode);
if (unlikely(bstart < 0 || bend < 0)) {
@@ -1003,6 +1018,8 @@ static int unionfs_permission(struct inode *inode, int mask,
out:
unionfs_check_inode(inode);
unionfs_check_nd(nd);
+ if (nd)
+ unionfs_unlock_dentry(nd->dentry);
return err;
}
--
1.5.2.2
Ensure we have lower inodes in prepare/commit_write.
Signed-off-by: Erez Zadok <[email protected]>
---
fs/unionfs/mmap.c | 26 +++++++++++++++++++++-----
1 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/fs/unionfs/mmap.c b/fs/unionfs/mmap.c
index a0e654b..ad770ac 100644
--- a/fs/unionfs/mmap.c
+++ b/fs/unionfs/mmap.c
@@ -224,13 +224,26 @@ out:
static int unionfs_prepare_write(struct file *file, struct page *page,
unsigned from, unsigned to)
{
+ int err;
+
+ unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
/*
- * Just copy lower inode attributes and return success. Not much
- * else to do here. No need to lock either (lockdep won't like it).
- * Let commit_write do all the hard work instead.
+ * This is the only place where we unconditionally copy the lower
+ * attribute times before calling unionfs_file_revalidate. The
+ * reason is that our ->write calls do_sync_write which in turn will
+ * call our ->prepare_write and then ->commit_write. Before our
+ * ->write is called, the lower mtimes are in sync, but by the time
+ * the VFS calls our ->commit_write, the lower mtimes have changed.
+ * Therefore, the only reasonable time for us to sync up from the
+ * changed lower mtimes, and avoid an invariant violation warning,
+ * is here, in ->prepare_write.
*/
unionfs_copy_attr_times(file->f_path.dentry->d_inode);
- return 0;
+ err = unionfs_file_revalidate(file, true);
+ unionfs_check_file(file);
+ unionfs_read_unlock(file->f_path.dentry->d_sb);
+
+ return err;
}
static int unionfs_commit_write(struct file *file, struct page *page,
@@ -252,7 +265,6 @@ static int unionfs_commit_write(struct file *file, struct page *page,
unionfs_check_file(file);
inode = page->mapping->host;
- lower_inode = unionfs_lower_inode(inode);
if (UNIONFS_F(file) != NULL)
lower_file = unionfs_lower_file(file);
@@ -282,6 +294,10 @@ static int unionfs_commit_write(struct file *file, struct page *page,
goto out;
/* if vfs_write succeeded above, sync up our times/sizes */
+ lower_inode = lower_file->f_path.dentry->d_inode;
+ if (!lower_inode)
+ lower_inode = unionfs_lower_inode(inode);
+ BUG_ON(!lower_inode);
fsstack_copy_inode_size(inode, lower_inode);
unionfs_copy_attr_times(inode);
mark_inode_dirty_sync(inode);
--
1.5.2.2
Signed-off-by: Erez Zadok <[email protected]>
---
fs/unionfs/dentry.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c
index d969640..cd15243 100644
--- a/fs/unionfs/dentry.c
+++ b/fs/unionfs/dentry.c
@@ -507,9 +507,10 @@ static void unionfs_d_iput(struct dentry *dentry, struct inode *inode)
{
int bindex, rc;
+ BUG_ON(!dentry);
unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
- if (dbstart(dentry) < 0)
+ if (!UNIONFS_D(dentry) || dbstart(dentry) < 0)
goto drop_lower_inodes;
for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++) {
if (unionfs_lower_mnt_idx(dentry, bindex)) {
--
1.5.2.2
CC: Joe Perches <[email protected]>
Signed-off-by: Erez Zadok <[email protected]>
---
fs/unionfs/debug.c | 51 +++++++++++++++++++++++++--------------------------
1 files changed, 25 insertions(+), 26 deletions(-)
diff --git a/fs/unionfs/debug.c b/fs/unionfs/debug.c
index 5f1d887..d154c32 100644
--- a/fs/unionfs/debug.c
+++ b/fs/unionfs/debug.c
@@ -472,16 +472,16 @@ void __show_inode_times(const struct inode *inode,
lower_inode = unionfs_lower_inode_idx(inode, bindex);
if (unlikely(!lower_inode))
continue;
- pr_debug("IT(%lu:%d): ", inode->i_ino, bindex);
- printk(KERN_CONT "%s:%s:%d ", file, fxn, line);
- printk(KERN_CONT "um=%lu/%lu lm=%lu/%lu ",
- inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
- lower_inode->i_mtime.tv_sec,
- lower_inode->i_mtime.tv_nsec);
- printk(KERN_CONT "uc=%lu/%lu lc=%lu/%lu\n",
- inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
- lower_inode->i_ctime.tv_sec,
- lower_inode->i_ctime.tv_nsec);
+ pr_debug("IT(%lu:%d): %s:%s:%d "
+ "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n",
+ inode->i_ino, bindex,
+ file, fxn, line,
+ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+ lower_inode->i_mtime.tv_sec,
+ lower_inode->i_mtime.tv_nsec,
+ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+ lower_inode->i_ctime.tv_sec,
+ lower_inode->i_ctime.tv_nsec);
}
}
@@ -496,17 +496,16 @@ void __show_dinode_times(const struct dentry *dentry,
lower_inode = unionfs_lower_inode_idx(inode, bindex);
if (!lower_inode)
continue;
- pr_debug("DT(%s:%lu:%d): ", dentry->d_name.name, inode->i_ino,
- bindex);
- printk(KERN_CONT "%s:%s:%d ", file, fxn, line);
- printk(KERN_CONT "um=%lu/%lu lm=%lu/%lu ",
- inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
- lower_inode->i_mtime.tv_sec,
- lower_inode->i_mtime.tv_nsec);
- printk(KERN_CONT "uc=%lu/%lu lc=%lu/%lu\n",
- inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
- lower_inode->i_ctime.tv_sec,
- lower_inode->i_ctime.tv_nsec);
+ pr_debug("DT(%s:%lu:%d): %s:%s:%d "
+ "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n",
+ dentry->d_name.name, inode->i_ino, bindex,
+ file, fxn, line,
+ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+ lower_inode->i_mtime.tv_sec,
+ lower_inode->i_mtime.tv_nsec,
+ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+ lower_inode->i_ctime.tv_sec,
+ lower_inode->i_ctime.tv_nsec);
}
}
@@ -525,10 +524,10 @@ void __show_inode_counts(const struct inode *inode,
lower_inode = unionfs_lower_inode_idx(inode, bindex);
if (unlikely(!lower_inode))
continue;
- printk(KERN_CONT "SIC(%lu:%d:%d): ", inode->i_ino, bindex,
- atomic_read(&(inode)->i_count));
- printk(KERN_CONT "lc=%d ",
- atomic_read(&(lower_inode)->i_count));
- printk(KERN_CONT "%s:%s:%d\n", file, fxn, line);
+ pr_debug("SIC(%lu:%d:%d): lc=%d %s:%s:%d\n",
+ inode->i_ino, bindex,
+ atomic_read(&(inode)->i_count),
+ atomic_read(&(lower_inode)->i_count),
+ file, fxn, line);
}
}
--
1.5.2.2