The following is a series of patchsets related to Unionfs. This is the
third set of patchsets resulting from an lkml review of the entire unionfs
code base. The most significant change here is a locking/race bugfix during
dentry revalidation.
These patches were tested (where appropriate) on Linus's 2.6.24 latest code
(as of v2.6.24-rc6-179-gb8c9a18), MM, as well as the backports to
2.6.{23,22,21,20,19,18,9} on ext2/3/4, xfs, reiserfs, nfs2/3/4, jffs2,
ramfs, tmpfs, cramfs, and squashfs (where available). Also tested with
LTP-full. See http://unionfs.filesystems.org/ to download back-ported
unionfs code.
Please pull from the 'master' branch of
git://git.kernel.org/pub/scm/linux/kernel/git/ezk/unionfs.git
to receive the following:
Erez Zadok (3):
Unionfs: use printk KERN_CONT for debugging messages
Unionfs: locking fixes
Unionfs: use VFS helpers to manipulate i_nlink
debug.c | 50 ++++++++++++++++++++++++++------------------------
dentry.c | 13 ++++++++++++-
fanout.h | 3 ++-
unlink.c | 2 +-
4 files changed, 41 insertions(+), 27 deletions(-)
---
Erez Zadok
[email protected]
Signed-off-by: Erez Zadok <[email protected]>
---
fs/unionfs/debug.c | 50 ++++++++++++++++++++++++++------------------------
1 files changed, 26 insertions(+), 24 deletions(-)
diff --git a/fs/unionfs/debug.c b/fs/unionfs/debug.c
index c2b8b58..5f1d887 100644
--- a/fs/unionfs/debug.c
+++ b/fs/unionfs/debug.c
@@ -456,9 +456,10 @@ void __show_branch_counts(const struct super_block *sb,
mnt = UNIONFS_D(sb->s_root)->lower_paths[i].mnt;
else
mnt = NULL;
- pr_debug("%d:", (mnt ? atomic_read(&mnt->mnt_count) : -99));
+ printk(KERN_CONT "%d:",
+ (mnt ? atomic_read(&mnt->mnt_count) : -99));
}
- pr_debug("%s:%s:%d\n", file, fxn, line);
+ printk(KERN_CONT "%s:%s:%d\n", file, fxn, line);
}
void __show_inode_times(const struct inode *inode,
@@ -472,15 +473,15 @@ void __show_inode_times(const struct inode *inode,
if (unlikely(!lower_inode))
continue;
pr_debug("IT(%lu:%d): ", inode->i_ino, bindex);
- pr_debug("%s:%s:%d ", file, fxn, line);
- pr_debug("um=%lu/%lu lm=%lu/%lu ",
- inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
- lower_inode->i_mtime.tv_sec,
- lower_inode->i_mtime.tv_nsec);
- pr_debug("uc=%lu/%lu lc=%lu/%lu\n",
- inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
- lower_inode->i_ctime.tv_sec,
- lower_inode->i_ctime.tv_nsec);
+ printk(KERN_CONT "%s:%s:%d ", file, fxn, line);
+ printk(KERN_CONT "um=%lu/%lu lm=%lu/%lu ",
+ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+ lower_inode->i_mtime.tv_sec,
+ lower_inode->i_mtime.tv_nsec);
+ printk(KERN_CONT "uc=%lu/%lu lc=%lu/%lu\n",
+ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+ lower_inode->i_ctime.tv_sec,
+ lower_inode->i_ctime.tv_nsec);
}
}
@@ -497,15 +498,15 @@ void __show_dinode_times(const struct dentry *dentry,
continue;
pr_debug("DT(%s:%lu:%d): ", dentry->d_name.name, inode->i_ino,
bindex);
- pr_debug("%s:%s:%d ", file, fxn, line);
- pr_debug("um=%lu/%lu lm=%lu/%lu ",
- inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
- lower_inode->i_mtime.tv_sec,
- lower_inode->i_mtime.tv_nsec);
- pr_debug("uc=%lu/%lu lc=%lu/%lu\n",
- inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
- lower_inode->i_ctime.tv_sec,
- lower_inode->i_ctime.tv_nsec);
+ printk(KERN_CONT "%s:%s:%d ", file, fxn, line);
+ printk(KERN_CONT "um=%lu/%lu lm=%lu/%lu ",
+ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+ lower_inode->i_mtime.tv_sec,
+ lower_inode->i_mtime.tv_nsec);
+ printk(KERN_CONT "uc=%lu/%lu lc=%lu/%lu\n",
+ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+ lower_inode->i_ctime.tv_sec,
+ lower_inode->i_ctime.tv_nsec);
}
}
@@ -524,9 +525,10 @@ void __show_inode_counts(const struct inode *inode,
lower_inode = unionfs_lower_inode_idx(inode, bindex);
if (unlikely(!lower_inode))
continue;
- pr_debug("SIC(%lu:%d:%d): ", inode->i_ino, bindex,
- atomic_read(&(inode)->i_count));
- pr_debug("lc=%d ", atomic_read(&(lower_inode)->i_count));
- pr_debug("%s:%s:%d\n", file, fxn, line);
+ printk(KERN_CONT "SIC(%lu:%d:%d): ", inode->i_ino, bindex,
+ atomic_read(&(inode)->i_count));
+ printk(KERN_CONT "lc=%d ",
+ atomic_read(&(lower_inode)->i_count));
+ printk(KERN_CONT "%s:%s:%d\n", file, fxn, line);
}
}
--
1.5.2.2
Lock parent dentries during revalidation.
Reduce total number of lockdep classes used.
Signed-off-by: Erez Zadok <[email protected]>
---
fs/unionfs/dentry.c | 13 ++++++++++++-
fs/unionfs/fanout.h | 3 ++-
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c
index 0369d93..7646828 100644
--- a/fs/unionfs/dentry.c
+++ b/fs/unionfs/dentry.c
@@ -42,6 +42,7 @@ static bool __unionfs_d_revalidate_one(struct dentry *dentry,
memset(&lowernd, 0, sizeof(struct nameidata));
verify_locked(dentry);
+ verify_locked(dentry->d_parent);
/* if the dentry is unhashed, do NOT revalidate */
if (d_deleted(dentry))
@@ -351,7 +352,10 @@ bool __unionfs_d_revalidate_chain(struct dentry *dentry, struct nameidata *nd,
* to child order.
*/
for (i = 0; i < chain_len; i++) {
- unionfs_lock_dentry(chain[i], UNIONFS_DMUTEX_REVAL+i);
+ unionfs_lock_dentry(chain[i], UNIONFS_DMUTEX_REVAL_CHILD);
+ if (chain[i] != chain[i]->d_parent)
+ unionfs_lock_dentry(chain[i]->d_parent,
+ UNIONFS_DMUTEX_REVAL_PARENT);
saved_bstart = dbstart(chain[i]);
saved_bend = dbend(chain[i]);
sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
@@ -366,6 +370,8 @@ bool __unionfs_d_revalidate_chain(struct dentry *dentry, struct nameidata *nd,
bindex++)
unionfs_mntput(chain[i], bindex);
}
+ if (chain[i] != chain[i]->d_parent)
+ unionfs_unlock_dentry(chain[i]->d_parent);
unionfs_unlock_dentry(chain[i]);
if (unlikely(!valid))
@@ -376,6 +382,9 @@ bool __unionfs_d_revalidate_chain(struct dentry *dentry, struct nameidata *nd,
out_this:
/* finally, lock this dentry and revalidate it */
verify_locked(dentry);
+ if (dentry != dentry->d_parent)
+ unionfs_lock_dentry(dentry->d_parent,
+ UNIONFS_DMUTEX_REVAL_PARENT);
dgen = atomic_read(&UNIONFS_D(dentry)->generation);
if (unlikely(is_newer_lower(dentry))) {
@@ -394,6 +403,8 @@ out_this:
purge_inode_data(dentry->d_inode);
}
valid = __unionfs_d_revalidate_one(dentry, nd);
+ if (dentry != dentry->d_parent)
+ unionfs_unlock_dentry(dentry->d_parent);
/*
* If __unionfs_d_revalidate_one() succeeded above, then it will
diff --git a/fs/unionfs/fanout.h b/fs/unionfs/fanout.h
index 5f31015..4d9a45f 100644
--- a/fs/unionfs/fanout.h
+++ b/fs/unionfs/fanout.h
@@ -290,7 +290,8 @@ enum unionfs_dentry_lock_class {
UNIONFS_DMUTEX_PARENT,
UNIONFS_DMUTEX_CHILD,
UNIONFS_DMUTEX_WHITEOUT,
- UNIONFS_DMUTEX_REVAL, /* for file/dentry revalidate */
+ UNIONFS_DMUTEX_REVAL_PARENT, /* for file/dentry revalidate */
+ UNIONFS_DMUTEX_REVAL_CHILD, /* for file/dentry revalidate */
};
static inline void unionfs_lock_dentry(struct dentry *d,
--
1.5.2.2
Signed-off-by: Erez Zadok <[email protected]>
---
fs/unionfs/unlink.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/fs/unionfs/unlink.c b/fs/unionfs/unlink.c
index a1c82b6..1e370a1 100644
--- a/fs/unionfs/unlink.c
+++ b/fs/unionfs/unlink.c
@@ -79,7 +79,7 @@ static int unionfs_unlink_whiteout(struct inode *dir, struct dentry *dentry)
out:
if (!err)
- dentry->d_inode->i_nlink--;
+ inode_dec_link_count(dentry->d_inode);
/* We don't want to leave negative leftover dentries for revalidate. */
if (!err && (dbopaque(dentry) != -1))
--
1.5.2.2
On Thu, 2008-01-03 at 00:57 -0500, Erez Zadok wrote:
> diff --git a/fs/unionfs/debug.c b/fs/unionfs/debug.c
> index c2b8b58..5f1d887 100644
> --- a/fs/unionfs/debug.c
> +++ b/fs/unionfs/debug.c
> void __show_inode_times(const struct inode *inode,
> @@ -472,15 +473,15 @@ void __show_inode_times(const struct inode *inode,
> if (unlikely(!lower_inode))
> continue;
> pr_debug("IT(%lu:%d): ", inode->i_ino, bindex);
> - pr_debug("%s:%s:%d ", file, fxn, line);
> - pr_debug("um=%lu/%lu lm=%lu/%lu ",
> - inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
> - lower_inode->i_mtime.tv_sec,
> - lower_inode->i_mtime.tv_nsec);
> - pr_debug("uc=%lu/%lu lc=%lu/%lu\n",
> - inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
> - lower_inode->i_ctime.tv_sec,
> - lower_inode->i_ctime.tv_nsec);
> + printk(KERN_CONT "%s:%s:%d ", file, fxn, line);
> + printk(KERN_CONT "um=%lu/%lu lm=%lu/%lu ",
> + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
> + lower_inode->i_mtime.tv_sec,
> + lower_inode->i_mtime.tv_nsec);
> + printk(KERN_CONT "uc=%lu/%lu lc=%lu/%lu\n",
> + inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
> + lower_inode->i_ctime.tv_sec,
> + lower_inode->i_ctime.tv_nsec);
> }
> }
>
I think printks should be single statements and
KERN_CONT should be used as sparingly as possible.
Perhaps:
pr_debug("IT(%lu:%d): %s:%s:%d "
"um=%lu/%lu lm=%lu/%lu "
"uc=%lu/%lu lc=%lu/%lu\n",
inode->i_ino, bindex, file, fnx, line,
inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
lower_inode->i_mtime.tv_sec,
lower_inode->i_mtime.tv_nsec
inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
lower_inode->i_ctime.tv_sec,
lower_inode->i_ctime.tv_nsec);
> @@ -497,15 +498,15 @@ void __show_dinode_times(const struct dentry *dentry,
> continue;
> pr_debug("DT(%s:%lu:%d): ", dentry->d_name.name, inode->i_ino,
> bindex);
> - pr_debug("%s:%s:%d ", file, fxn, line);
> - pr_debug("um=%lu/%lu lm=%lu/%lu ",
> - inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
> - lower_inode->i_mtime.tv_sec,
> - lower_inode->i_mtime.tv_nsec);
> - pr_debug("uc=%lu/%lu lc=%lu/%lu\n",
> - inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
> - lower_inode->i_ctime.tv_sec,
> - lower_inode->i_ctime.tv_nsec);
> + printk(KERN_CONT "%s:%s:%d ", file, fxn, line);
> + printk(KERN_CONT "um=%lu/%lu lm=%lu/%lu ",
> + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
> + lower_inode->i_mtime.tv_sec,
> + lower_inode->i_mtime.tv_nsec);
> + printk(KERN_CONT "uc=%lu/%lu lc=%lu/%lu\n",
> + inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
> + lower_inode->i_ctime.tv_sec,
> + lower_inode->i_ctime.tv_nsec);
> }
> }
>
and
pr_debug("DT(%s:%lu:%d): %s:%s:%d "
"um=%lu/%lu lm=%lu/%lu "
"uc=%lu/%lu lc=%lu/%lu\n",
dentry->d_name.name, inode->i_ino, bindex,
file, fnx, line,
inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
lower_inode->i_mtime.tv_sec,
lower_inode->i_mtime.tv_nsec
inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
lower_inode->i_ctime.tv_sec,
lower_inode->i_ctime.tv_nsec);
>
> @@ -524,9 +525,10 @@ void __show_inode_counts(const struct inode *inode,
> lower_inode = unionfs_lower_inode_idx(inode, bindex);
> if (unlikely(!lower_inode))
> continue;
> - pr_debug("SIC(%lu:%d:%d): ", inode->i_ino, bindex,
> - atomic_read(&(inode)->i_count));
> - pr_debug("lc=%d ", atomic_read(&(lower_inode)->i_count));
> - pr_debug("%s:%s:%d\n", file, fxn, line);
> + printk(KERN_CONT "SIC(%lu:%d:%d): ", inode->i_ino, bindex,
> + atomic_read(&(inode)->i_count));
> + printk(KERN_CONT "lc=%d ",
> + atomic_read(&(lower_inode)->i_count));
> + printk(KERN_CONT "%s:%s:%d\n", file, fxn, line);
> }
> }
and
pr_debug("SIC(%lu:%d:%d): lc=%d %s:%s:%d\n",
inode->i_ino, bindex,
atomic_read(&(inode)->i_count),
atomic_read(&(lower_inode)->i_count),
file, fxn, line);
In message <1199341581.6615.39.camel@localhost>, Joe Perches writes:
> On Thu, 2008-01-03 at 00:57 -0500, Erez Zadok wrote:
> I think printks should be single statements and
> KERN_CONT should be used as sparingly as possible.
[...]
KERN_CONT is documented as not being SMP safe, but I figured it was harmless
for just some debugging message. Still, I like your way better. Thanks
Joe.
> Perhaps:
> pr_debug("IT(%lu:%d): %s:%s:%d "
> "um=%lu/%lu lm=%lu/%lu "
> "uc=%lu/%lu lc=%lu/%lu\n",
> inode->i_ino, bindex, file, fnx, line,
> inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
> lower_inode->i_mtime.tv_sec,
> lower_inode->i_mtime.tv_nsec
> inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
> lower_inode->i_ctime.tv_sec,
> lower_inode->i_ctime.tv_nsec);
[...]
Erez.