Before 2.6.37, the md layer had a mechanism for catching I/Os with the barrier
flag set, and translating the barrier into barriers for all the underlying
devices. With 2.6.37, I/O barriers have become plain old flushes, and the md
code was updated to reflect this. However, one piece was left out -- the md
layer does not tell the block layer that it supports flushes or FUA access at
all, which results in md silently dropping flush requests.
Since the support already seems there, just add this one piece of bookkeeping
to restore the ability to flush writes through md.
Signed-off-by: Darrick J. Wong <[email protected]>
---
drivers/md/md.c | 25 ++++++++++++++++++++++++-
1 files changed, 24 insertions(+), 1 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 324a366..a52d7be 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -356,6 +356,21 @@ EXPORT_SYMBOL(mddev_congested);
/*
* Generic flush handling for md
*/
+static void evaluate_flush_capability(mddev_t *mddev)
+{
+ mdk_rdev_t *rdev;
+ unsigned int flush = REQ_FLUSH | REQ_FUA;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(rdev, &mddev->disks, same_set) {
+ if (rdev->raid_disk < 0)
+ continue;
+ flush &= rdev->bdev->bd_disk->queue->flush_flags;
+ }
+ rcu_read_unlock();
+
+ blk_queue_flush(mddev->queue, flush);
+}
static void md_end_flush(struct bio *bio, int err)
{
@@ -1885,6 +1900,8 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
/* May as well allow recovery to be retried once */
mddev->recovery_disabled = 0;
+ evaluate_flush_capability(mddev);
+
return 0;
fail:
@@ -1903,17 +1920,23 @@ static void md_delayed_delete(struct work_struct *ws)
static void unbind_rdev_from_array(mdk_rdev_t * rdev)
{
char b[BDEVNAME_SIZE];
+ mddev_t *mddev;
+
if (!rdev->mddev) {
MD_BUG();
return;
}
- bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
+ mddev = rdev->mddev;
+ bd_release_from_disk(rdev->bdev, mddev->gendisk);
list_del_rcu(&rdev->same_set);
printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
rdev->mddev = NULL;
sysfs_remove_link(&rdev->kobj, "block");
sysfs_put(rdev->sysfs_state);
rdev->sysfs_state = NULL;
+
+ evaluate_flush_capability(mddev);
+
/* We need to delay this, otherwise we can deadlock when
* writing to 'remove' to "dev/state". We also need
* to delay it due to rcu usage.
On Mon, 22 Nov 2010 15:22:08 -0800
"Darrick J. Wong" <[email protected]> wrote:
> Before 2.6.37, the md layer had a mechanism for catching I/Os with the barrier
> flag set, and translating the barrier into barriers for all the underlying
> devices. With 2.6.37, I/O barriers have become plain old flushes, and the md
> code was updated to reflect this. However, one piece was left out -- the md
> layer does not tell the block layer that it supports flushes or FUA access at
> all, which results in md silently dropping flush requests.
>
> Since the support already seems there, just add this one piece of bookkeeping
> to restore the ability to flush writes through md.
I would rather just unconditionally call
blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
I don't think there is much to be gained by trying to track exactly what the
underlying devices support, and as the devices can change, that is racy
anyway.
Thoughts?
NeilBrown
>
> Signed-off-by: Darrick J. Wong <[email protected]>
> ---
>
> drivers/md/md.c | 25 ++++++++++++++++++++++++-
> 1 files changed, 24 insertions(+), 1 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 324a366..a52d7be 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -356,6 +356,21 @@ EXPORT_SYMBOL(mddev_congested);
> /*
> * Generic flush handling for md
> */
> +static void evaluate_flush_capability(mddev_t *mddev)
> +{
> + mdk_rdev_t *rdev;
> + unsigned int flush = REQ_FLUSH | REQ_FUA;
> +
> + rcu_read_lock();
> + list_for_each_entry_rcu(rdev, &mddev->disks, same_set) {
> + if (rdev->raid_disk < 0)
> + continue;
> + flush &= rdev->bdev->bd_disk->queue->flush_flags;
> + }
> + rcu_read_unlock();
> +
> + blk_queue_flush(mddev->queue, flush);
> +}
>
> static void md_end_flush(struct bio *bio, int err)
> {
> @@ -1885,6 +1900,8 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
> /* May as well allow recovery to be retried once */
> mddev->recovery_disabled = 0;
>
> + evaluate_flush_capability(mddev);
> +
> return 0;
>
> fail:
> @@ -1903,17 +1920,23 @@ static void md_delayed_delete(struct work_struct *ws)
> static void unbind_rdev_from_array(mdk_rdev_t * rdev)
> {
> char b[BDEVNAME_SIZE];
> + mddev_t *mddev;
> +
> if (!rdev->mddev) {
> MD_BUG();
> return;
> }
> - bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
> + mddev = rdev->mddev;
> + bd_release_from_disk(rdev->bdev, mddev->gendisk);
> list_del_rcu(&rdev->same_set);
> printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
> rdev->mddev = NULL;
> sysfs_remove_link(&rdev->kobj, "block");
> sysfs_put(rdev->sysfs_state);
> rdev->sysfs_state = NULL;
> +
> + evaluate_flush_capability(mddev);
> +
> /* We need to delay this, otherwise we can deadlock when
> * writing to 'remove' to "dev/state". We also need
> * to delay it due to rcu usage.
On Tue, Nov 23, 2010 at 10:50:00AM +1100, Neil Brown wrote:
> On Mon, 22 Nov 2010 15:22:08 -0800
> "Darrick J. Wong" <[email protected]> wrote:
>
> > Before 2.6.37, the md layer had a mechanism for catching I/Os with the barrier
> > flag set, and translating the barrier into barriers for all the underlying
> > devices. With 2.6.37, I/O barriers have become plain old flushes, and the md
> > code was updated to reflect this. However, one piece was left out -- the md
> > layer does not tell the block layer that it supports flushes or FUA access at
> > all, which results in md silently dropping flush requests.
> >
> > Since the support already seems there, just add this one piece of bookkeeping
> > to restore the ability to flush writes through md.
>
> I would rather just unconditionally call
> blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
>
> I don't think there is much to be gained by trying to track exactly what the
> underlying devices support, and as the devices can change, that is racy
> anyway.
>
> Thoughts?
I don't think there's anything that would get confused by an md that advertises
flush/fua support when any of the underlying devices don't support it, but that
was the only reason why I didn't just code it up your way to start with. :)
None of the in-kernel code checks queue->flush_flags except the block layer
itself, and the block layer silently strips off REQ_FLUSH/REQ_FUA if the device
doesn't support it. I'm not sure I like /that/ behavior, but at the moment I
have no objection. dm hardcodes the flags on as well.
--D
Here's a shorter version that sets up flush/fua unconditionally.
---
Before 2.6.37, the md layer had a mechanism for catching I/Os with the barrier
flag set, and translating the barrier into barriers for all the underlying
devices. With 2.6.37, I/O barriers have become plain old flushes, and the md
code was updated to reflect this. However, one piece was left out -- the md
layer does not tell the block layer that it supports flushes or FUA access at
all, which results in md silently dropping flush requests.
Since the support already seems there, just add this one piece of bookkeeping.
Signed-off-by: Darrick J. Wong <[email protected]>
---
drivers/md/md.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 324a366..43243a4 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4338,6 +4338,8 @@ static int md_alloc(dev_t dev, char *name)
if (mddev->kobj.sd &&
sysfs_create_group(&mddev->kobj, &md_bitmap_group))
printk(KERN_DEBUG "pointless warning\n");
+
+ blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
abort:
mutex_unlock(&disks_mutex);
if (!error && mddev->kobj.sd) {
On Tue, 23 Nov 2010 11:01:03 -0800
"Darrick J. Wong" <[email protected]> wrote:
> Here's a shorter version that sets up flush/fua unconditionally.
> ---
> Before 2.6.37, the md layer had a mechanism for catching I/Os with the barrier
> flag set, and translating the barrier into barriers for all the underlying
> devices. With 2.6.37, I/O barriers have become plain old flushes, and the md
> code was updated to reflect this. However, one piece was left out -- the md
> layer does not tell the block layer that it supports flushes or FUA access at
> all, which results in md silently dropping flush requests.
>
> Since the support already seems there, just add this one piece of bookkeeping.
>
> Signed-off-by: Darrick J. Wong <[email protected]>
> ---
>
> drivers/md/md.c | 2 ++
> 1 files changed, 2 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 324a366..43243a4 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -4338,6 +4338,8 @@ static int md_alloc(dev_t dev, char *name)
> if (mddev->kobj.sd &&
> sysfs_create_group(&mddev->kobj, &md_bitmap_group))
> printk(KERN_DEBUG "pointless warning\n");
> +
> + blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
> abort:
> mutex_unlock(&disks_mutex);
> if (!error && mddev->kobj.sd) {
Applied, thanks.
NeilBrown