Hi everyone,
The fact that write_super gets called for both syncs and periodic
commits causes problems for the journaled filesystems, since we
need to trigger commits on write_super to have sync() behave
properly.
So, this patch adds a new super operation called commit_super,
and extends struct super.s_dirt a little so the filesystem
can say: call me on sync() but don't call me from kupdate.
if (s_dirt & S_SUPER_DIRTY) call me from kupdate and on sync
if (s_dirt & S_SUPER_DIRTY_COMMIT) call me on sync only.
If the commit_super operation is not provided, fs/super.c goes
back to the old s_dirt usage.
I've got reiserfs bits to make use of this, they increase our
streaming write performance by ~30%. This patch is against 2.4.19-pre1,
it applies cleanly to pre3 but I haven't tested that yet. I'll port
to 2.5 if we agree the patch is a good idea.
-chris
--- linus.25/fs/super.c Mon, 28 Jan 2002 09:51:50 -0500
+++ speedup.1/fs/super.c Sun, 12 May 2002 10:54:29 -0400
@@ -453,15 +453,68 @@
put_super(sb);
}
+/* since we've added the idea of comit_dirty vs regular dirty with
+ * commit_super operation, only use the S_SUPER_DIRTY mask if
+ * the FS has a commit_super op.
+ */
+static inline int super_dirty(struct super_block *sb)
+{
+ if (sb->s_op && sb->s_op->commit_super) {
+ return sb->s_dirt & S_SUPER_DIRTY;
+ }
+ return sb->s_dirt;
+}
+
+
static inline void write_super(struct super_block *sb)
{
lock_super(sb);
- if (sb->s_root && sb->s_dirt)
+ if (sb->s_root && super_dirty(sb))
if (sb->s_op && sb->s_op->write_super)
sb->s_op->write_super(sb);
unlock_super(sb);
}
+static inline void commit_super(struct super_block *sb)
+{
+ lock_super(sb);
+ if (sb->s_root && sb->s_dirt) {
+ if (sb->s_op && sb->s_op->write_super)
+ sb->s_op->write_super(sb);
+ if (sb->s_op && sb->s_op->commit_super)
+ sb->s_op->commit_super(sb);
+ }
+ unlock_super(sb);
+}
+
+void commit_supers(kdev_t dev)
+{
+ struct super_block * sb;
+
+ if (dev) {
+ sb = get_super(dev);
+ if (sb) {
+ if (sb->s_dirt)
+ commit_super(sb);
+ drop_super(sb);
+ }
+ }
+restart:
+ spin_lock(&sb_lock);
+ sb = sb_entry(super_blocks.next);
+ while (sb != sb_entry(&super_blocks))
+ if (sb->s_dirt) {
+ sb->s_count++;
+ spin_unlock(&sb_lock);
+ down_read(&sb->s_umount);
+ commit_super(sb);
+ drop_super(sb);
+ goto restart;
+ } else
+ sb = sb_entry(sb->s_list.next);
+ spin_unlock(&sb_lock);
+}
+
/*
* Note: check the dirty flag before waiting, so we don't
* hold up the sync while mounting a device. (The newly
@@ -484,7 +537,7 @@
spin_lock(&sb_lock);
sb = sb_entry(super_blocks.next);
while (sb != sb_entry(&super_blocks))
- if (sb->s_dirt) {
+ if (super_dirty(sb)) {
sb->s_count++;
spin_unlock(&sb_lock);
down_read(&sb->s_umount);
--- linus.25/fs/buffer.c Mon, 11 Feb 2002 12:21:42 -0500
+++ speedup.1/fs/buffer.c Sun, 12 May 2002 10:54:29 -0400
@@ -327,6 +327,8 @@
lock_super(sb);
if (sb->s_dirt && sb->s_op && sb->s_op->write_super)
sb->s_op->write_super(sb);
+ if (sb->s_op && sb->s_op->commit_super)
+ sb->s_op->commit_super(sb);
unlock_super(sb);
unlock_kernel();
@@ -346,7 +348,7 @@
lock_kernel();
sync_inodes(dev);
DQUOT_SYNC(dev);
- sync_supers(dev);
+ commit_supers(dev);
unlock_kernel();
return sync_buffers(dev, 1);
--- linus.25/include/linux/fs.h Mon, 28 Jan 2002 09:51:50 -0500
+++ speedup.1/include/linux/fs.h Sun, 12 May 2002 10:54:29 -0400
@@ -697,6 +697,10 @@
#define sb_entry(list) list_entry((list), struct super_block, s_list)
#define S_BIAS (1<<30)
+
+/* flags for the s_dirt field */
+#define S_SUPER_DIRTY 1
+#define S_SUPER_DIRTY_COMMIT 2
struct super_block {
struct list_head s_list; /* Keep this first */
kdev_t s_dev;
@@ -909,6 +913,7 @@
struct dentry * (*fh_to_dentry)(struct super_block *sb, __u32 *fh, int len, int fhtype, int parent);
int (*dentry_to_fh)(struct dentry *, __u32 *fh, int *lenp, int need_parent);
int (*show_options)(struct seq_file *, struct vfsmount *);
+ void (*commit_super) (struct super_block *);
};
/* Inode state bits.. */
@@ -1216,6 +1221,7 @@
extern int filemap_fdatasync(struct address_space *);
extern int filemap_fdatawait(struct address_space *);
extern void sync_supers(kdev_t);
+extern void commit_supers(kdev_t);
extern int bmap(struct inode *, int);
extern int notify_change(struct dentry *, struct iattr *);
extern int permission(struct inode *, int);
Chris Mason wrote:
>
> Hi everyone,
>
> The fact that write_super gets called for both syncs and periodic
> commits causes problems for the journaled filesystems, since we
> need to trigger commits on write_super to have sync() behave
> properly.
>
> So, this patch adds a new super operation called commit_super,
> and extends struct super.s_dirt a little so the filesystem
> can say: call me on sync() but don't call me from kupdate.
>
> if (s_dirt & S_SUPER_DIRTY) call me from kupdate and on sync
> if (s_dirt & S_SUPER_DIRTY_COMMIT) call me on sync only.
>
I'm not quite sure why these flags exist? Would it not be
sufficient to just call ->write_super() inside kupdate,
and ->commit_super in fsync_dev()? (With a ->write_super
fallback, of course).
Either way, this is good - ext3's write_super() can just
become a one-liner: `sb->s_dirt = 0'.
-
On Tuesday, March 12, 2002 02:15:40 PM -0800 Andrew Morton <[email protected]> wrote:
> Chris Mason wrote:
>>
>> Hi everyone,
>>
>> The fact that write_super gets called for both syncs and periodic
>> commits causes problems for the journaled filesystems, since we
>> need to trigger commits on write_super to have sync() behave
>> properly.
>>
>> So, this patch adds a new super operation called commit_super,
>> and extends struct super.s_dirt a little so the filesystem
>> can say: call me on sync() but don't call me from kupdate.
>>
>> if (s_dirt & S_SUPER_DIRTY) call me from kupdate and on sync
>> if (s_dirt & S_SUPER_DIRTY_COMMIT) call me on sync only.
>>
>
> I'm not quite sure why these flags exist? Would it not be
> sufficient to just call ->write_super() inside kupdate,
> and ->commit_super in fsync_dev()? (With a ->write_super
> fallback, of course).
fsync_dev(dev != 0) is easy, you can ignore the dirty flag
and call commit_super on the proper device.
But, the loop in sync_supers(dev == 0) is harder, it expects
some flag it can check, and it expects the callback to the FS
will clear that flag. Adding a new flag seemed like more fun
than redoing the locking and super walk. I'm curious to hear what
Al thinks of it though.
-chris
On Tuesday, March 12, 2002 05:48:47 PM -0500 Chris Mason <[email protected]> wrote:
>>> if (s_dirt & S_SUPER_DIRTY) call me from kupdate and on sync
>>> if (s_dirt & S_SUPER_DIRTY_COMMIT) call me on sync only.
>>>
>>
>> I'm not quite sure why these flags exist? Would it not be
>> sufficient to just call ->write_super() inside kupdate,
>> and ->commit_super in fsync_dev()? (With a ->write_super
>> fallback, of course).
>
> fsync_dev(dev != 0) is easy, you can ignore the dirty flag
> and call commit_super on the proper device.
>
> But, the loop in sync_supers(dev == 0) is harder, it expects
> some flag it can check, and it expects the callback to the FS
> will clear that flag. Adding a new flag seemed like more fun
> than redoing the locking and super walk. I'm curious to hear what
> Al thinks of it though.
Of course, that's slightly more likely if I actually cc the right
address.
-chris
On Tue, 12 Mar 2002, Chris Mason wrote:
>
> But, the loop in sync_supers(dev == 0) is harder, it expects
> some flag it can check, and it expects the callback to the FS
> will clear that flag. Adding a new flag seemed like more fun
> than redoing the locking and super walk. I'm curious to hear what
> Al thinks of it though.
Sorry if this is irrelevant to your case (wasn't following the thread),
but we noticed that repeatedly-restarting sync_supers() loop some while
back. Would this patch help?
Hugh
--- 2.4.19-pre3/fs/super.c Tue Mar 12 02:16:52 2002
+++ linux/fs/super.c Wed Mar 13 20:27:54 2002
@@ -398,6 +398,7 @@
struct file_system_type *fs = s->s_type;
spin_lock(&sb_lock);
+ s->s_type = NULL;
list_del(&s->s_list);
list_del(&s->s_instances);
spin_unlock(&sb_lock);
@@ -461,19 +462,25 @@
}
return;
}
-restart:
spin_lock(&sb_lock);
+restart:
sb = sb_entry(super_blocks.next);
- while (sb != sb_entry(&super_blocks))
+ while (sb != sb_entry(&super_blocks)) {
if (sb->s_dirt) {
sb->s_count++;
spin_unlock(&sb_lock);
down_read(&sb->s_umount);
write_super(sb);
- drop_super(sb);
- goto restart;
- } else
- sb = sb_entry(sb->s_list.next);
+ up_read(&sb->s_umount);
+ spin_lock(&sb_lock);
+ if (!--sb->s_count) {
+ destroy_super(sb);
+ goto restart;
+ } else if (!sb->s_type)
+ goto restart;
+ }
+ sb = sb_entry(sb->s_list.next);
+ }
spin_unlock(&sb_lock);
}