2009-01-14 15:12:30

by Jan Kara

[permalink] [raw]
Subject: [PATCH 1/2] ext2: Update also inode on disk when dir is IS_DIRSYNC

We used to just write changed page for IS_DIRSYNC inodes. But we also have to
update directory inode itself just for the case that we've allocated a new
block and changed i_size.

Thanks to AKPM for spotting a serious bug in the original version of this
patch.

Signed-off-by: Jan Kara <[email protected]>
CC: Pavel Machek <[email protected]>
---
fs/ext2/dir.c | 7 +++++--
1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 9a0fc40..2999d72 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -95,10 +95,13 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
mark_inode_dirty(dir);
}

- if (IS_DIRSYNC(dir))
+ if (IS_DIRSYNC(dir)) {
err = write_one_page(page, 1);
- else
+ if (!err)
+ err = ext2_sync_inode(dir);
+ } else {
unlock_page(page);
+ }

return err;
}
--
1.6.0.2



2009-01-14 15:12:30

by Jan Kara

[permalink] [raw]
Subject: [PATCH 2/2] ext2: Add blk_issue_flush() to syncing paths

To be really safe that the data hit the platter, we should also flush drive's
writeback caches on fsync and for O_SYNC files or O_DIRSYNC inodes.

Signed-off-by: Jan Kara <[email protected]>
CC: [email protected]
---
fs/ext2/dir.c | 5 ++++-
fs/ext2/fsync.c | 7 +++++--
fs/ext2/inode.c | 7 +++++++
fs/ext2/xattr.c | 6 +++++-
4 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 2999d72..d6cb59f 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -25,6 +25,7 @@
#include <linux/buffer_head.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
+#include <linux/blkdev.h> /* for blkdev_issue_flush() */

typedef struct ext2_dir_entry_2 ext2_dirent;

@@ -97,8 +98,10 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)

if (IS_DIRSYNC(dir)) {
err = write_one_page(page, 1);
- if (!err)
+ if (!err) {
err = ext2_sync_inode(dir);
+ blkdev_issue_flush(dir->i_sb->s_bdev, NULL);
+ }
} else {
unlock_page(page);
}
diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c
index fc66c93..9cd1838 100644
--- a/fs/ext2/fsync.c
+++ b/fs/ext2/fsync.c
@@ -24,6 +24,7 @@

#include "ext2.h"
#include <linux/buffer_head.h> /* for sync_mapping_buffers() */
+#include <linux/blkdev.h> /* for blkdev_issue_flush() */


/*
@@ -39,12 +40,14 @@ int ext2_sync_file(struct file *file, struct dentry *dentry, int datasync)

ret = sync_mapping_buffers(inode->i_mapping);
if (!(inode->i_state & I_DIRTY))
- return ret;
+ goto out;
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
- return ret;
+ goto out;

err = ext2_sync_inode(inode);
if (ret == 0)
ret = err;
+out:
+ blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
return ret;
}
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 23fff2f..49b479e 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -33,6 +33,7 @@
#include <linux/mpage.h>
#include <linux/fiemap.h>
#include <linux/namei.h>
+#include <linux/blkdev.h> /* for blkdev_issue_flush() */
#include "ext2.h"
#include "acl.h"
#include "xip.h"
@@ -68,9 +69,14 @@ void ext2_delete_inode (struct inode * inode)
mark_inode_dirty(inode);
ext2_update_inode(inode, inode_needs_sync(inode));

+ /* Make sure inode deletion really gets to disk. Disk write caches
+ * are flushed either in ext2_truncate() or we do it explicitly */
inode->i_size = 0;
if (inode->i_blocks)
ext2_truncate (inode);
+ else if (inode_needs_sync(inode))
+ blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+
ext2_free_inode (inode);

return;
@@ -1104,6 +1110,7 @@ do_indirects:
if (inode_needs_sync(inode)) {
sync_mapping_buffers(inode->i_mapping);
ext2_sync_inode (inode);
+ blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
} else {
mark_inode_dirty(inode);
}
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 987a526..d480216 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -60,6 +60,7 @@
#include <linux/mbcache.h>
#include <linux/quotaops.h>
#include <linux/rwsem.h>
+#include <linux/blkdev.h> /* for blkdev_issue_flush() */
#include "ext2.h"
#include "xattr.h"
#include "acl.h"
@@ -702,6 +703,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
DQUOT_FREE_BLOCK(inode, 1);
goto cleanup;
}
+ blkdev_issue_flush(sb->s_bdev, NULL);
} else
mark_inode_dirty(inode);

@@ -792,8 +794,10 @@ ext2_xattr_delete_inode(struct inode *inode)
le32_to_cpu(HDR(bh)->h_refcount));
unlock_buffer(bh);
mark_buffer_dirty(bh);
- if (IS_SYNC(inode))
+ if (IS_SYNC(inode)) {
sync_dirty_buffer(bh);
+ blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+ }
DQUOT_FREE_BLOCK(inode, 1);
}
EXT2_I(inode)->i_file_acl = 0;
--
1.6.0.2


2009-01-14 17:36:30

by Eric Sandeen

[permalink] [raw]
Subject: Re: [PATCH 2/2] ext2: Add blk_issue_flush() to syncing paths

Jan Kara wrote:
> To be really safe that the data hit the platter, we should also flush drive's
> writeback caches on fsync and for O_SYNC files or O_DIRSYNC inodes.

Seems sane, but aren't we getting really divergent behavior here between
ext2, ext3, and ext4 w.r.t. drive cache flushing for sync paths?

(at least, ext3 has no calls to flush cache at this point)

-Eric

> Signed-off-by: Jan Kara <[email protected]>
> CC: [email protected]
> ---
> fs/ext2/dir.c | 5 ++++-
> fs/ext2/fsync.c | 7 +++++--
> fs/ext2/inode.c | 7 +++++++
> fs/ext2/xattr.c | 6 +++++-
> 4 files changed, 21 insertions(+), 4 deletions(-)
>
> diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
> index 2999d72..d6cb59f 100644
> --- a/fs/ext2/dir.c
> +++ b/fs/ext2/dir.c
> @@ -25,6 +25,7 @@
> #include <linux/buffer_head.h>
> #include <linux/pagemap.h>
> #include <linux/swap.h>
> +#include <linux/blkdev.h> /* for blkdev_issue_flush() */
>
> typedef struct ext2_dir_entry_2 ext2_dirent;
>
> @@ -97,8 +98,10 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
>
> if (IS_DIRSYNC(dir)) {
> err = write_one_page(page, 1);
> - if (!err)
> + if (!err) {
> err = ext2_sync_inode(dir);
> + blkdev_issue_flush(dir->i_sb->s_bdev, NULL);
> + }
> } else {
> unlock_page(page);
> }
> diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c
> index fc66c93..9cd1838 100644
> --- a/fs/ext2/fsync.c
> +++ b/fs/ext2/fsync.c
> @@ -24,6 +24,7 @@
>
> #include "ext2.h"
> #include <linux/buffer_head.h> /* for sync_mapping_buffers() */
> +#include <linux/blkdev.h> /* for blkdev_issue_flush() */
>
>
> /*
> @@ -39,12 +40,14 @@ int ext2_sync_file(struct file *file, struct dentry *dentry, int datasync)
>
> ret = sync_mapping_buffers(inode->i_mapping);
> if (!(inode->i_state & I_DIRTY))
> - return ret;
> + goto out;
> if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
> - return ret;
> + goto out;
>
> err = ext2_sync_inode(inode);
> if (ret == 0)
> ret = err;
> +out:
> + blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
> return ret;
> }
> diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
> index 23fff2f..49b479e 100644
> --- a/fs/ext2/inode.c
> +++ b/fs/ext2/inode.c
> @@ -33,6 +33,7 @@
> #include <linux/mpage.h>
> #include <linux/fiemap.h>
> #include <linux/namei.h>
> +#include <linux/blkdev.h> /* for blkdev_issue_flush() */
> #include "ext2.h"
> #include "acl.h"
> #include "xip.h"
> @@ -68,9 +69,14 @@ void ext2_delete_inode (struct inode * inode)
> mark_inode_dirty(inode);
> ext2_update_inode(inode, inode_needs_sync(inode));
>
> + /* Make sure inode deletion really gets to disk. Disk write caches
> + * are flushed either in ext2_truncate() or we do it explicitly */
> inode->i_size = 0;
> if (inode->i_blocks)
> ext2_truncate (inode);
> + else if (inode_needs_sync(inode))
> + blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
> +
> ext2_free_inode (inode);
>
> return;
> @@ -1104,6 +1110,7 @@ do_indirects:
> if (inode_needs_sync(inode)) {
> sync_mapping_buffers(inode->i_mapping);
> ext2_sync_inode (inode);
> + blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
> } else {
> mark_inode_dirty(inode);
> }
> diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
> index 987a526..d480216 100644
> --- a/fs/ext2/xattr.c
> +++ b/fs/ext2/xattr.c
> @@ -60,6 +60,7 @@
> #include <linux/mbcache.h>
> #include <linux/quotaops.h>
> #include <linux/rwsem.h>
> +#include <linux/blkdev.h> /* for blkdev_issue_flush() */
> #include "ext2.h"
> #include "xattr.h"
> #include "acl.h"
> @@ -702,6 +703,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
> DQUOT_FREE_BLOCK(inode, 1);
> goto cleanup;
> }
> + blkdev_issue_flush(sb->s_bdev, NULL);
> } else
> mark_inode_dirty(inode);
>
> @@ -792,8 +794,10 @@ ext2_xattr_delete_inode(struct inode *inode)
> le32_to_cpu(HDR(bh)->h_refcount));
> unlock_buffer(bh);
> mark_buffer_dirty(bh);
> - if (IS_SYNC(inode))
> + if (IS_SYNC(inode)) {
> sync_dirty_buffer(bh);
> + blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
> + }
> DQUOT_FREE_BLOCK(inode, 1);
> }
> EXT2_I(inode)->i_file_acl = 0;


2009-01-14 17:40:24

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 2/2] ext2: Add blk_issue_flush() to syncing paths

On Wed 14-01-09 11:35:50, Eric Sandeen wrote:
> Jan Kara wrote:
> > To be really safe that the data hit the platter, we should also flush drive's
> > writeback caches on fsync and for O_SYNC files or O_DIRSYNC inodes.
>
> Seems sane, but aren't we getting really divergent behavior here between
> ext2, ext3, and ext4 w.r.t. drive cache flushing for sync paths?
Well, but ext3/4 should do a barrier on a transaction commit (if the user
really cares about data integrity) and hence it implicitely does the same.

Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR

2009-01-14 17:47:52

by Eric Sandeen

[permalink] [raw]
Subject: Re: [PATCH 2/2] ext2: Add blk_issue_flush() to syncing paths

Jan Kara wrote:
> On Wed 14-01-09 11:35:50, Eric Sandeen wrote:
>> Jan Kara wrote:
>>> To be really safe that the data hit the platter, we should also flush drive's
>>> writeback caches on fsync and for O_SYNC files or O_DIRSYNC inodes.
>> Seems sane, but aren't we getting really divergent behavior here between
>> ext2, ext3, and ext4 w.r.t. drive cache flushing for sync paths?
> Well, but ext3/4 should do a barrier on a transaction commit (if the user
> really cares about data integrity) and hence it implicitely does the same.
>
> Honza

Sorry, just now catching up with that other thread, which addresses this
topic. :) But yes, this seems correct... (OTOH, I thought SuSE was
carrying a patch which did add this blkdev_flush to sync for ext3...)

-Eric

2009-01-14 18:18:41

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 2/2] ext2: Add blk_issue_flush() to syncing paths

On Wed, 14 Jan 2009 16:12:28 +0100 Jan Kara <[email protected]> wrote:

> To be really safe that the data hit the platter, we should also flush drive's
> writeback caches on fsync and for O_SYNC files or O_DIRSYNC inodes.
>

It's not good to randomly sprinkle blkdev_issue_flush() calls all over
the filesystem like this. How do we know that you didn't miss a site?
How do we ensure that people who modify the fs in the future don't
forget to add the blkdev_issue_flush() call, if needed?

IOW, it is fragile. Is there anything we can do to make this more
robust? Do the flush calls from some higher-level callsite? Perhaps
even the VFS?

> @@ -97,8 +98,10 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
>
> if (IS_DIRSYNC(dir)) {
> err = write_one_page(page, 1);
> - if (!err)
> + if (!err) {
> err = ext2_sync_inode(dir);
> + blkdev_issue_flush(dir->i_sb->s_bdev, NULL);

The patch itself would have been a bit neater if it had added

int ext3_blkdev_issue_flush(struct inode *inode)

and called that, IMO.


Also, the changelog needs some work, methinks.

/**
* blkdev_issue_flush - queue a flush
* @bdev: blockdev to issue flush for
* @error_sector: error sector
*
* Description:
* Issue a flush for the block device in question. Caller can supply
* room for storing the error offset in case of a flush error, if they
* wish to. Caller must run wait_for_completion() on its own.
*/

So afaict the change you've made is incomplete. We'll queue a
writeback command to the disk but we won't wait for it to be sent down
the wire. Nor do we wait for the command to complete at the device
end. So it can still be a looong time (seconds!) before the data which
the user thinks is on disk really is safe.

Yes?

If so, this design decision should be described in the changelog, and
justified. Actually, doing this in the comment over
ext3_blkdev_issue_flush() would be good.



2009-01-14 18:27:16

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH 2/2] ext2: Add blk_issue_flush() to syncing paths

Hi!

> Also, the changelog needs some work, methinks.
>
> /**
> * blkdev_issue_flush - queue a flush
> * @bdev: blockdev to issue flush for
> * @error_sector: error sector
> *
> * Description:
> * Issue a flush for the block device in question. Caller can supply
> * room for storing the error offset in case of a flush error, if they
> * wish to. Caller must run wait_for_completion() on its own.
> */
>
> So afaict the change you've made is incomplete. We'll queue a
> writeback command to the disk but we won't wait for it to be sent down
> the wire. Nor do we wait for the command to complete at the device
> end. So it can still be a looong time (seconds!) before the data which
> the user thinks is on disk really is safe.
>
> Yes?
>
> If so, this design decision should be described in the changelog, and
> justified. Actually, doing this in the comment over
> ext3_blkdev_issue_flush() would be good.

Actually, if sync() and fsync() do not work, it probably needs more
than a comment in changelog. Like comment in documentation, in big
bold letters.
Pavel

--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

2009-01-14 22:05:11

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 2/2] ext2: Add blk_issue_flush() to syncing paths

On Wed, Jan 14, 2009 at 10:18:34AM -0800, Andrew Morton wrote:
> /**
> * blkdev_issue_flush - queue a flush
> * @bdev: blockdev to issue flush for
> * @error_sector: error sector
> *
> * Description:
> * Issue a flush for the block device in question. Caller can supply
> * room for storing the error offset in case of a flush error, if they
> * wish to. Caller must run wait_for_completion() on its own.
> */
>
> So afaict the change you've made is incomplete. We'll queue a
> writeback command to the disk but we won't wait for it to be sent down
> the wire.

No, that last part of the comment is stale, and it's already been
confirmed by Jens. He's queued in the block tree:

>From 801d773a6ff5dbb37c9eaa4b89ae3fc6574ba294 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <[email protected]>
Date: Tue, 13 Jan 2009 09:25:08 -0500
Subject: [PATCH] block: Fix documentation for blkdev_issue_flush()

Signed-off-by: "Theodore Ts'o" <[email protected]>
---
block/blk-barrier.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 8eba4e4..f7dae57 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -302,7 +302,7 @@ static void bio_end_empty_barrier(struct bio *bio, int err)
* Description:
* Issue a flush for the block device in question. Caller can supply
* room for storing the error offset in case of a flush error, if they
- * wish to. Caller must run wait_for_completion() on its own.
+ * wish to.
*/
int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
{
--
1.6.0.4.8.g36f27.dirty

- Ted

2009-01-14 22:09:13

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 2/2] ext2: Add blk_issue_flush() to syncing paths

On Wed, Jan 14, 2009 at 10:18:34AM -0800, Andrew Morton wrote:
> On Wed, 14 Jan 2009 16:12:28 +0100 Jan Kara <[email protected]> wrote:
>
> > To be really safe that the data hit the platter, we should also flush drive's
> > writeback caches on fsync and for O_SYNC files or O_DIRSYNC inodes.
> >
>
> It's not good to randomly sprinkle blkdev_issue_flush() calls all over
> the filesystem like this. How do we know that you didn't miss a site?
> How do we ensure that people who modify the fs in the future don't
> forget to add the blkdev_issue_flush() call, if needed?
>
> IOW, it is fragile. Is there anything we can do to make this more
> robust? Do the flush calls from some higher-level callsite? Perhaps
> even the VFS?

The problem with doing it in the VFS is that we might end up calling
flush twice; it may very well be that for some filesystems, we'll be
calling the flush operation as part of writing out the commit block,
for example.

I'm not sure it's fair to say that we need to "randomly sprinkle
blkdev_issue_flush() calls all over the place". The number of places
where we need to do O_SYNC or fsync() handling is in fact quite small.
We were simply ignoring this problem before.

> > + blkdev_issue_flush(dir->i_sb->s_bdev, NULL);
>
> The patch itself would have been a bit neater if it had added
>
> int ext3_blkdev_issue_flush(struct inode *inode)

What, just to avoid needing the "..->i_sb->s_bdev" construction? I
personally find extra levels of inline functions to be harder to deal
with long term, since I then I have to track down the definition of
ext3_blkdev_issue_flush in the header files to make sure there isn't
any other magic hiding in there other than the i->i_sb->s_bdev
derefencing.

- Ted

2009-01-14 22:18:26

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 2/2] ext2: Add blk_issue_flush() to syncing paths

On Wed, 14 Jan 2009 17:04:18 -0500
Theodore Tso <[email protected]> wrote:

> On Wed, Jan 14, 2009 at 10:18:34AM -0800, Andrew Morton wrote:
> > /**
> > * blkdev_issue_flush - queue a flush
> > * @bdev: blockdev to issue flush for
> > * @error_sector: error sector
> > *
> > * Description:
> > * Issue a flush for the block device in question. Caller can supply
> > * room for storing the error offset in case of a flush error, if they
> > * wish to. Caller must run wait_for_completion() on its own.
> > */
> >
> > So afaict the change you've made is incomplete. We'll queue a
> > writeback command to the disk but we won't wait for it to be sent down
> > the wire.
>
> No, that last part of the comment is stale, and it's already been
> confirmed by Jens. He's queued in the block tree:
>
> >From 801d773a6ff5dbb37c9eaa4b89ae3fc6574ba294 Mon Sep 17 00:00:00 2001
> From: Theodore Ts'o <[email protected]>
> Date: Tue, 13 Jan 2009 09:25:08 -0500
> Subject: [PATCH] block: Fix documentation for blkdev_issue_flush()
>
> Signed-off-by: "Theodore Ts'o" <[email protected]>
> ---
> block/blk-barrier.c | 2 +-
> 1 files changed, 1 insertions(+), 1 deletions(-)
>
> diff --git a/block/blk-barrier.c b/block/blk-barrier.c
> index 8eba4e4..f7dae57 100644
> --- a/block/blk-barrier.c
> +++ b/block/blk-barrier.c
> @@ -302,7 +302,7 @@ static void bio_end_empty_barrier(struct bio *bio, int err)
> * Description:
> * Issue a flush for the block device in question. Caller can supply
> * room for storing the error offset in case of a flush error, if they
> - * wish to. Caller must run wait_for_completion() on its own.
> + * wish to.
> */
> int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
> {

<glances at the code>

<doh>

The function's name is now inappropriate.

2009-01-14 22:26:33

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 2/2] ext2: Add blk_issue_flush() to syncing paths

On Wed, 14 Jan 2009 17:09:00 -0500
Theodore Tso <[email protected]> wrote:

> On Wed, Jan 14, 2009 at 10:18:34AM -0800, Andrew Morton wrote:
> > On Wed, 14 Jan 2009 16:12:28 +0100 Jan Kara <[email protected]> wrote:
> >
> > > To be really safe that the data hit the platter, we should also flush drive's
> > > writeback caches on fsync and for O_SYNC files or O_DIRSYNC inodes.
> > >
> >
> > It's not good to randomly sprinkle blkdev_issue_flush() calls all over
> > the filesystem like this. How do we know that you didn't miss a site?
> > How do we ensure that people who modify the fs in the future don't
> > forget to add the blkdev_issue_flush() call, if needed?
> >
> > IOW, it is fragile. Is there anything we can do to make this more
> > robust? Do the flush calls from some higher-level callsite? Perhaps
> > even the VFS?
>
> The problem with doing it in the VFS is that we might end up calling
> flush twice; it may very well be that for some filesystems, we'll be
> calling the flush operation as part of writing out the commit block,
> for example.

It depends how it's done. One way would be via a new
address_space_operations entry.

> I'm not sure it's fair to say that we need to "randomly sprinkle
> blkdev_issue_flush() calls all over the place". The number of places
> where we need to do O_SYNC or fsync() handling is in fact quite small.
> We were simply ignoring this problem before.

Well it _does_ sprinkle them all over.

> > > + blkdev_issue_flush(dir->i_sb->s_bdev, NULL);
> >
> > The patch itself would have been a bit neater if it had added
> >
> > int ext3_blkdev_issue_flush(struct inode *inode)
>
> What, just to avoid needing the "..->i_sb->s_bdev" construction?

Partly as a cleanup, yes. But such a function then becomes a site
where we can add commentary explaining the reason for its existence,
along with commentary which describes the semantics in details,
exceptions, caveats, etc.

It also becomes a single site where the feature can be disabled for
those who wish to evaluate its performance cost.

The single site can also be used for convenient gathering of timing
information.

One thing we may well choose to do is to stop issuing the flush if
blkdev_issue_flush() is returning -EOPNOTSUPP, which some devices will
presumably do. Reissuing a pointless and quite expensive command over
and over again doesn't seem clever.

And I can probably think of more reasons :)


> I
> personally find extra levels of inline functions

It shouldn't be inlined.

> to be harder to deal
> with long term, since I then I have to track down the definition of
> ext3_blkdev_issue_flush in the header files to make sure there isn't
> any other magic hiding in there other than the i->i_sb->s_bdev
> derefencing.
>
> - Ted