2010-04-12 13:04:13

by Dmitry Monakhov

[permalink] [raw]
Subject: [PATCH 0/4] blkdev: blkdev_issue_fn cleanups v2

- Convert all blkdev_issue_xxx function to common set of flags
- move common functions to block/blk-lib.c
- Add generic zeroout helper

changes from V1
Christoph is about to change discard memory payload allocation logic
so discard cleanups are no longer necessary.


2010-04-12 13:04:19

by Dmitry Monakhov

[permalink] [raw]
Subject: [PATCH 1/4] blkdev: pass gfp_mask and flags to blkdev_issue_flush

In some places caller don't want to wait a request to complete.
Flags make blkdev_issue_flush() more flexible. This patch just
convert existing callers to new interface without chaining
existing allocation/wait behavior.

Signed-off-by: Dmitry Monakhov <[email protected]>
---
block/blk-barrier.c | 43 +++++++++++++++++++++---------------
drivers/block/drbd/drbd_int.h | 3 +-
drivers/block/drbd/drbd_receiver.c | 3 +-
fs/block_dev.c | 3 +-
fs/ext3/fsync.c | 3 +-
fs/ext4/fsync.c | 6 +++-
fs/jbd2/checkpoint.c | 3 +-
fs/jbd2/commit.c | 6 +++-
fs/reiserfs/file.c | 3 +-
fs/xfs/linux-2.6/xfs_super.c | 3 +-
include/linux/blkdev.h | 10 ++++++-
11 files changed, 55 insertions(+), 31 deletions(-)

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 8618d89..7e6e810 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -285,26 +285,31 @@ static void bio_end_empty_barrier(struct bio *bio, int err)
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
clear_bit(BIO_UPTODATE, &bio->bi_flags);
}
-
- complete(bio->bi_private);
+ if (bio->bi_private)
+ complete(bio->bi_private);
+ bio_put(bio);
}

/**
* blkdev_issue_flush - queue a flush
* @bdev: blockdev to issue flush for
+ * @gfp_mask: memory allocation flags (for bio_alloc)
* @error_sector: error sector
+ * @flags: BLKDEV_IFL_* flags to control behaviour
*
* Description:
* Issue a flush for the block device in question. Caller can supply
* room for storing the error offset in case of a flush error, if they
- * wish to.
+ * wish to. If WAIT flag is not passed then caller may check only what
+ * request was pushed in some internal queue for later handling.
*/
-int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
+int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
+ sector_t *error_sector, unsigned long flags)
{
DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q;
struct bio *bio;
- int ret;
+ int ret = 0;

if (bdev->bd_disk == NULL)
return -ENXIO;
@@ -313,23 +318,25 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
if (!q)
return -ENXIO;

- bio = bio_alloc(GFP_KERNEL, 0);
+ bio = bio_alloc(gfp_mask, 0);
bio->bi_end_io = bio_end_empty_barrier;
- bio->bi_private = &wait;
bio->bi_bdev = bdev;
- submit_bio(WRITE_BARRIER, bio);
+ if (test_bit(__BLKDEV_IFL_WAIT, &flags))
+ bio->bi_private = &wait;

- wait_for_completion(&wait);
-
- /*
- * The driver must store the error location in ->bi_sector, if
- * it supports it. For non-stacked drivers, this should be copied
- * from blk_rq_pos(rq).
- */
- if (error_sector)
- *error_sector = bio->bi_sector;
+ bio_get(bio);
+ submit_bio(WRITE_BARRIER, bio);
+ if (test_bit(__BLKDEV_IFL_WAIT, &flags)) {
+ wait_for_completion(&wait);
+ /*
+ * The driver must store the error location in ->bi_sector, if
+ * it supports it. For non-stacked drivers, this should be
+ * copied from blk_rq_pos(rq).
+ */
+ if (error_sector)
+ *error_sector = bio->bi_sector;
+ }

- ret = 0;
if (bio_flagged(bio, BIO_EOPNOTSUPP))
ret = -EOPNOTSUPP;
else if (!bio_flagged(bio, BIO_UPTODATE))
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index e5e86a7..d6f1ae3 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -2251,7 +2251,8 @@ static inline void drbd_md_flush(struct drbd_conf *mdev)
if (test_bit(MD_NO_BARRIER, &mdev->flags))
return;

- r = blkdev_issue_flush(mdev->ldev->md_bdev, NULL);
+ r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
if (r) {
set_bit(MD_NO_BARRIER, &mdev->flags);
dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index ed9f1de..54f56ea 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -945,7 +945,8 @@ static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d
int rv;

if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
- rv = blkdev_issue_flush(mdev->ldev->backing_bdev, NULL);
+ rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
+ NULL, BLKDEV_IFL_WAIT);
if (rv) {
dev_err(DEV, "local disk flush failed with status %d\n", rv);
/* would rather check on EOPNOTSUPP, but that is not reliable.
diff --git a/fs/block_dev.c b/fs/block_dev.c
index d11d028..b1c8062 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -413,7 +413,8 @@ static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
if (error)
return error;

- error = blkdev_issue_flush(bdev, NULL);
+ error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL,
+ (BLKDEV_IFL_WAIT));
if (error == -EOPNOTSUPP)
error = 0;
return error;
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 8209f26..9492f60 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -91,7 +91,8 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
* storage
*/
if (test_opt(inode->i_sb, BARRIER))
- blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+ blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
out:
return ret;
}
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0d0c323..ef3d980 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -100,9 +100,11 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
if (ext4_should_writeback_data(inode) &&
(journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
- blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+ blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
+ NULL, BLKDEV_IFL_WAIT);
jbd2_log_wait_commit(journal, commit_tid);
} else if (journal->j_flags & JBD2_BARRIER)
- blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+ blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
return ret;
}
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 30beb11..076d1cc 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -530,7 +530,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
*/
if ((journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
- blkdev_issue_flush(journal->j_fs_dev, NULL);
+ blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
if (!(journal->j_flags & JBD2_ABORT))
jbd2_journal_update_superblock(journal, 1);
return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 671da7f..75716d3 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -717,7 +717,8 @@ start_journal_io:
if (commit_transaction->t_flushed_data_blocks &&
(journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
- blkdev_issue_flush(journal->j_fs_dev, NULL);
+ blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);

/* Done it all: now write the commit record asynchronously. */
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
@@ -727,7 +728,8 @@ start_journal_io:
if (err)
__jbd2_journal_abort_hard(journal);
if (journal->j_flags & JBD2_BARRIER)
- blkdev_issue_flush(journal->j_dev, NULL);
+ blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
}

err = journal_finish_inode_data_buffers(journal, commit_transaction);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 1d9c127..9977df9 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -147,7 +147,8 @@ static int reiserfs_sync_file(struct file *filp,
barrier_done = reiserfs_commit_for_inode(inode);
reiserfs_write_unlock(inode->i_sb);
if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
- blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+ blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
if (barrier_done < 0)
return barrier_done;
return (err < 0) ? -EIO : 0;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 71345a3..a98f35e 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -724,7 +724,8 @@ void
xfs_blkdev_issue_flush(
xfs_buftarg_t *buftarg)
{
- blkdev_issue_flush(buftarg->bt_bdev, NULL);
+ blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
}

STATIC void
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6690e8b..f669656 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -994,8 +994,14 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
return NULL;
return bqt->tag_index[tag];
}
-
-extern int blkdev_issue_flush(struct block_device *, sector_t *);
+enum{
+ __BLKDEV_IFL_WAIT, /* wait for completion */
+ __BLKDEV_IFL_BARRIER, /*issue request with barrier */
+};
+#define BLKDEV_IFL_WAIT (1 << __BLKDEV_IFL_WAIT)
+#define BLKDEV_IFL_BARRIER (1 << __BLKDEV_IFL_BARRIER)
+extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *,
+ unsigned long);
#define DISCARD_FL_WAIT 0x01 /* wait for completion */
#define DISCARD_FL_BARRIER 0x02 /* issue DISCARD_BARRIER request */
extern int blkdev_issue_discard(struct block_device *, sector_t sector,
--
1.6.6.1

2010-04-12 13:04:21

by Dmitry Monakhov

[permalink] [raw]
Subject: [PATCH 2/4] blkdev: generalize flags for blkdev_issue_fn functions

The patch just convert all blkdev_issue_xxx function to common
flags. Wait/allocation semantics not changed.

Signed-off-by: Dmitry Monakhov <[email protected]>
---
block/blk-barrier.c | 10 +++++-----
block/ioctl.c | 2 +-
fs/btrfs/extent-tree.c | 2 +-
fs/gfs2/rgrp.c | 5 +++--
include/linux/blkdev.h | 8 +++-----
mm/swapfile.c | 8 +++++---
6 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 7e6e810..56254b1 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -368,17 +368,17 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
* @sector: start sector
* @nr_sects: number of sectors to discard
* @gfp_mask: memory allocation flags (for bio_alloc)
- * @flags: DISCARD_FL_* flags to control behaviour
+ * @flags: BLKDEV_IFL_* flags to control behaviour
*
* Description:
* Issue a discard request for the sectors in question.
*/
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, int flags)
+ sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
{
DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q = bdev_get_queue(bdev);
- int type = flags & DISCARD_FL_BARRIER ?
+ int type = flags & BLKDEV_IFL_BARRIER ?
DISCARD_BARRIER : DISCARD_NOBARRIER;
struct bio *bio;
struct page *page;
@@ -401,7 +401,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
bio->bi_sector = sector;
bio->bi_end_io = blkdev_discard_end_io;
bio->bi_bdev = bdev;
- if (flags & DISCARD_FL_WAIT)
+ if (flags & BLKDEV_IFL_BARRIER)
bio->bi_private = &wait;

/*
@@ -432,7 +432,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
bio_get(bio);
submit_bio(type, bio);

- if (flags & DISCARD_FL_WAIT)
+ if (flags & BLKDEV_IFL_BARRIER)
wait_for_completion(&wait);

if (bio_flagged(bio, BIO_EOPNOTSUPP))
diff --git a/block/ioctl.c b/block/ioctl.c
index be48ea5..d803cac 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -125,7 +125,7 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
if (start + len > (bdev->bd_inode->i_size >> 9))
return -EINVAL;
return blkdev_issue_discard(bdev, start, len, GFP_KERNEL,
- DISCARD_FL_WAIT);
+ BLKDEV_IFL_WAIT);
}

static int put_ushort(unsigned long arg, unsigned short val)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 559f724..7453ac4 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1588,7 +1588,7 @@ static void btrfs_issue_discard(struct block_device *bdev,
u64 start, u64 len)
{
blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
- DISCARD_FL_BARRIER);
+ BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
}

static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 503b842..bf011dc 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -854,7 +854,8 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
if ((start + nr_sects) != blk) {
rv = blkdev_issue_discard(bdev, start,
nr_sects, GFP_NOFS,
- DISCARD_FL_BARRIER);
+ BLKDEV_IFL_WAIT |
+ BLKDEV_IFL_BARRIER);
if (rv)
goto fail;
nr_sects = 0;
@@ -869,7 +870,7 @@ start_new_extent:
}
if (nr_sects) {
rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS,
- DISCARD_FL_BARRIER);
+ BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
if (rv)
goto fail;
}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f669656..a222351 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1002,10 +1002,8 @@ enum{
#define BLKDEV_IFL_BARRIER (1 << __BLKDEV_IFL_BARRIER)
extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *,
unsigned long);
-#define DISCARD_FL_WAIT 0x01 /* wait for completion */
-#define DISCARD_FL_BARRIER 0x02 /* issue DISCARD_BARRIER request */
-extern int blkdev_issue_discard(struct block_device *, sector_t sector,
- sector_t nr_sects, gfp_t, int flags);
+extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);

static inline int sb_issue_discard(struct super_block *sb,
sector_t block, sector_t nr_blocks)
@@ -1013,7 +1011,7 @@ static inline int sb_issue_discard(struct super_block *sb,
block <<= (sb->s_blocksize_bits - 9);
nr_blocks <<= (sb->s_blocksize_bits - 9);
return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL,
- DISCARD_FL_BARRIER);
+ BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
}

extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6cd0a8f..4a16fad 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -139,7 +139,8 @@ static int discard_swap(struct swap_info_struct *si)
nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
if (nr_blocks) {
err = blkdev_issue_discard(si->bdev, start_block,
- nr_blocks, GFP_KERNEL, DISCARD_FL_BARRIER);
+ nr_blocks, GFP_KERNEL,
+ BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
if (err)
return err;
cond_resched();
@@ -150,7 +151,8 @@ static int discard_swap(struct swap_info_struct *si)
nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);

err = blkdev_issue_discard(si->bdev, start_block,
- nr_blocks, GFP_KERNEL, DISCARD_FL_BARRIER);
+ nr_blocks, GFP_KERNEL,
+ BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
if (err)
break;

@@ -189,7 +191,7 @@ static void discard_swap_cluster(struct swap_info_struct *si,
start_block <<= PAGE_SHIFT - 9;
nr_blocks <<= PAGE_SHIFT - 9;
if (blkdev_issue_discard(si->bdev, start_block,
- nr_blocks, GFP_NOIO, DISCARD_FL_BARRIER))
+ nr_blocks, GFP_NOIO, BLKDEV_IFL_BARRIER))
break;
}

--
1.6.6.1

2010-04-12 13:04:40

by Dmitry Monakhov

[permalink] [raw]
Subject: [PATCH 3/4] blkdev: add blkdev_issue helper functions

Move blkdev_issue_discard from blk-barrier.c because it is not barrier
related.

Signed-off-by: Dmitry Monakhov <[email protected]>
---
block/Makefile | 2 +-
block/blk-barrier.c | 103 ----------------------------------------------
block/blk-lib.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 115 insertions(+), 104 deletions(-)
create mode 100644 block/blk-lib.c

diff --git a/block/Makefile b/block/Makefile
index cb2d515..0bb499a 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
- blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o
+ blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o

obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 56254b1..1242bdd 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -347,106 +347,3 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
}
EXPORT_SYMBOL(blkdev_issue_flush);

-static void blkdev_discard_end_io(struct bio *bio, int err)
-{
- if (err) {
- if (err == -EOPNOTSUPP)
- set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
- }
-
- if (bio->bi_private)
- complete(bio->bi_private);
- __free_page(bio_page(bio));
-
- bio_put(bio);
-}
-
-/**
- * blkdev_issue_discard - queue a discard
- * @bdev: blockdev to issue discard for
- * @sector: start sector
- * @nr_sects: number of sectors to discard
- * @gfp_mask: memory allocation flags (for bio_alloc)
- * @flags: BLKDEV_IFL_* flags to control behaviour
- *
- * Description:
- * Issue a discard request for the sectors in question.
- */
-int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
-{
- DECLARE_COMPLETION_ONSTACK(wait);
- struct request_queue *q = bdev_get_queue(bdev);
- int type = flags & BLKDEV_IFL_BARRIER ?
- DISCARD_BARRIER : DISCARD_NOBARRIER;
- struct bio *bio;
- struct page *page;
- int ret = 0;
-
- if (!q)
- return -ENXIO;
-
- if (!blk_queue_discard(q))
- return -EOPNOTSUPP;
-
- while (nr_sects && !ret) {
- unsigned int sector_size = q->limits.logical_block_size;
- unsigned int max_discard_sectors =
- min(q->limits.max_discard_sectors, UINT_MAX >> 9);
-
- bio = bio_alloc(gfp_mask, 1);
- if (!bio)
- goto out;
- bio->bi_sector = sector;
- bio->bi_end_io = blkdev_discard_end_io;
- bio->bi_bdev = bdev;
- if (flags & BLKDEV_IFL_BARRIER)
- bio->bi_private = &wait;
-
- /*
- * Add a zeroed one-sector payload as that's what
- * our current implementations need. If we'll ever need
- * more the interface will need revisiting.
- */
- page = alloc_page(gfp_mask | __GFP_ZERO);
- if (!page)
- goto out_free_bio;
- if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
- goto out_free_page;
-
- /*
- * And override the bio size - the way discard works we
- * touch many more blocks on disk than the actual payload
- * length.
- */
- if (nr_sects > max_discard_sectors) {
- bio->bi_size = max_discard_sectors << 9;
- nr_sects -= max_discard_sectors;
- sector += max_discard_sectors;
- } else {
- bio->bi_size = nr_sects << 9;
- nr_sects = 0;
- }
-
- bio_get(bio);
- submit_bio(type, bio);
-
- if (flags & BLKDEV_IFL_BARRIER)
- wait_for_completion(&wait);
-
- if (bio_flagged(bio, BIO_EOPNOTSUPP))
- ret = -EOPNOTSUPP;
- else if (!bio_flagged(bio, BIO_UPTODATE))
- ret = -EIO;
- bio_put(bio);
- }
- return ret;
-out_free_page:
- __free_page(page);
-out_free_bio:
- bio_put(bio);
-out:
- return -ENOMEM;
-}
-EXPORT_SYMBOL(blkdev_issue_discard);
diff --git a/block/blk-lib.c b/block/blk-lib.c
new file mode 100644
index 0000000..d7827e5
--- /dev/null
+++ b/block/blk-lib.c
@@ -0,0 +1,114 @@
+/*
+ * Functions related to generic helpers functions
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/scatterlist.h>
+
+#include "blk.h"
+
+static void blkdev_discard_end_io(struct bio *bio, int err)
+{
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ }
+
+ if (bio->bi_private)
+ complete(bio->bi_private);
+ __free_page(bio_page(bio));
+
+ bio_put(bio);
+}
+
+/**
+ * blkdev_issue_discard - queue a discard
+ * @bdev: blockdev to issue discard for
+ * @sector: start sector
+ * @nr_sects: number of sectors to discard
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ * @flags: BLKDEV_IFL_* flags to control behaviour
+ *
+ * Description:
+ * Issue a discard request for the sectors in question.
+ */
+int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+{
+ DECLARE_COMPLETION_ONSTACK(wait);
+ struct request_queue *q = bdev_get_queue(bdev);
+ int type = flags & BLKDEV_IFL_BARRIER ?
+ DISCARD_BARRIER : DISCARD_NOBARRIER;
+ struct bio *bio;
+ struct page *page;
+ int ret = 0;
+
+ if (!q)
+ return -ENXIO;
+
+ if (!blk_queue_discard(q))
+ return -EOPNOTSUPP;
+
+ while (nr_sects && !ret) {
+ unsigned int sector_size = q->limits.logical_block_size;
+ unsigned int max_discard_sectors =
+ min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+
+ bio = bio_alloc(gfp_mask, 1);
+ if (!bio)
+ goto out;
+ bio->bi_sector = sector;
+ bio->bi_end_io = blkdev_discard_end_io;
+ bio->bi_bdev = bdev;
+ if (flags & BLKDEV_IFL_BARRIER)
+ bio->bi_private = &wait;
+
+ /*
+ * Add a zeroed one-sector payload as that's what
+ * our current implementations need. If we'll ever need
+ * more the interface will need revisiting.
+ */
+ page = alloc_page(gfp_mask | __GFP_ZERO);
+ if (!page)
+ goto out_free_bio;
+ if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
+ goto out_free_page;
+
+ /*
+ * And override the bio size - the way discard works we
+ * touch many more blocks on disk than the actual payload
+ * length.
+ */
+ if (nr_sects > max_discard_sectors) {
+ bio->bi_size = max_discard_sectors << 9;
+ nr_sects -= max_discard_sectors;
+ sector += max_discard_sectors;
+ } else {
+ bio->bi_size = nr_sects << 9;
+ nr_sects = 0;
+ }
+
+ bio_get(bio);
+ submit_bio(type, bio);
+
+ if (flags & BLKDEV_IFL_BARRIER)
+ wait_for_completion(&wait);
+
+ if (bio_flagged(bio, BIO_EOPNOTSUPP))
+ ret = -EOPNOTSUPP;
+ else if (!bio_flagged(bio, BIO_UPTODATE))
+ ret = -EIO;
+ bio_put(bio);
+ }
+ return ret;
+out_free_page:
+ __free_page(page);
+out_free_bio:
+ bio_put(bio);
+out:
+ return -ENOMEM;
+}
+EXPORT_SYMBOL(blkdev_issue_discard);
--
1.6.6.1

2010-04-12 13:04:39

by Dmitry Monakhov

[permalink] [raw]
Subject: [PATCH 4/4] patch blk-add-zeroout-helper.patch


Signed-off-by: Dmitry Monakhov <[email protected]>
---
block/blk-lib.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/blkdev.h | 3 +-
2 files changed, 120 insertions(+), 1 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index d7827e5..fa36030 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -112,3 +112,121 @@ out:
return -ENOMEM;
}
EXPORT_SYMBOL(blkdev_issue_discard);
+
+struct bio_batch
+{
+ atomic_t done;
+ unsigned long flags;
+ struct completion *wait;
+ bio_end_io_t *end_io;
+};
+
+static void bio_batch_end_io(struct bio *bio, int err)
+{
+ struct bio_batch *bb = bio->bi_private;
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ set_bit(BIO_EOPNOTSUPP, &bb->flags);
+ else
+ clear_bit(BIO_UPTODATE, &bb->flags);
+ }
+ if (bb) {
+ if (bb->end_io)
+ bb->end_io(bio, err);
+ atomic_inc(&bb->done);
+ complete(bb->wait);
+ }
+ bio_put(bio);
+}
+
+/**
+ * __blkdev_issue_zeroout generate number of zero filed write bios
+ * @bdev: blockdev to issue
+ * @sector: start sector
+ * @nr_sects: number of sectors to write
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ * @flags: BLKDEV_IFL_* flags to control behaviour
+ *
+ * Description:
+ * Generate and issue number of bios with zerofiled pages.
+ * Send barrier at the beginning and at the end if requested. This guarantie
+ * correct request ordering. Empty barrier allow us to avoid post queue flush.
+ */
+
+int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+{
+ int ret = 0;
+ struct bio *bio;
+ struct bio_batch bb;
+ unsigned int sz, issued = 0;
+ DECLARE_COMPLETION_ONSTACK(wait);
+
+ atomic_set(&bb.done, 0);
+ bb.flags = 1 << BIO_UPTODATE;
+ bb.wait = &wait;
+ bb.end_io = NULL;
+
+ if (flags & BLKDEV_IFL_BARRIER) {
+ /* issue async barrier before the data */
+ ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0);
+ if (ret)
+ return ret;
+ }
+submit:
+ while (nr_sects != 0) {
+ bio = bio_alloc(gfp_mask,
+ min(nr_sects, (sector_t)BIO_MAX_PAGES));
+ if (!bio)
+ break;
+
+ bio->bi_sector = sector;
+ bio->bi_bdev = bdev;
+ bio->bi_end_io = bio_batch_end_io;
+ if (flags & BLKDEV_IFL_WAIT)
+ bio->bi_private = &bb;
+
+ while(nr_sects != 0) {
+ sz = min(PAGE_SIZE >> 9 , nr_sects);
+ if (sz == 0)
+ /* bio has maximum size possible */
+ break;
+ ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
+ nr_sects -= ret >> 9;
+ sector += ret >> 9;
+ if (ret < (sz << 9))
+ break;
+ }
+ issued++;
+ submit_bio(WRITE, bio);
+ }
+ /*
+ * When all data bios are in flight. Send final barrier if requeted.
+ */
+ if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER)
+ ret = blkdev_issue_flush(bdev, gfp_mask, NULL,
+ flags & BLKDEV_IFL_WAIT);
+
+
+ if (flags & BLKDEV_IFL_WAIT)
+ /* Wait for bios in-flight */
+ while ( issued != atomic_read(&bb.done))
+ wait_for_completion(&wait);
+
+ if (!test_bit(BIO_UPTODATE, &bb.flags))
+ /* One of bios in the batch was completed with error.*/
+ ret = -EIO;
+
+ if (ret)
+ goto out;
+
+ if (test_bit(BIO_EOPNOTSUPP, &bb.flags)) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+ if (nr_sects != 0)
+ goto submit;
+out:
+ return ret;
+}
+EXPORT_SYMBOL(__blkdev_issue_zeroout);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a222351..787edbb 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1004,7 +1004,8 @@ extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *,
unsigned long);
extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
-
+extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
static inline int sb_issue_discard(struct super_block *sb,
sector_t block, sector_t nr_blocks)
{
--
1.6.6.1

2010-04-13 18:05:12

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 2/4] blkdev: generalize flags for blkdev_issue_fn functions

On Mon, Apr 12, 2010 at 05:03:55PM +0400, Dmitry Monakhov wrote:
> The patch just convert all blkdev_issue_xxx function to common
> flags. Wait/allocation semantics not changed.
>
> Signed-off-by: Dmitry Monakhov <[email protected]>

Looks good in general. But I would re-order this to be patch 1 so you
can avoid introducing flags for blkdev_issue_flush just to replace them
in the next one.

2010-04-13 18:06:11

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 3/4] blkdev: add blkdev_issue helper functions

On Mon, Apr 12, 2010 at 05:03:56PM +0400, Dmitry Monakhov wrote:
> Move blkdev_issue_discard from blk-barrier.c because it is not barrier
> related.

If you create a new file just for discard I would call it blk-discard.c.
But I can't really be bothered too much about the placement of this two
functions, so do whatever is fine with you and Jens.

2010-04-13 18:07:32

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 4/4] patch blk-add-zeroout-helper.patch

Looks okayish to me, but it really needs:

a) a useful patch description
b) patches that introduce actual users

Also why does the function have a __ prefix in the name?

2010-04-13 18:08:38

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 1/4] blkdev: pass gfp_mask and flags to blkdev_issue_flush

On Mon, Apr 12, 2010 at 05:03:54PM +0400, Dmitry Monakhov wrote:
> In some places caller don't want to wait a request to complete.
> Flags make blkdev_issue_flush() more flexible. This patch just
> convert existing callers to new interface without chaining
> existing allocation/wait behavior.

Looks okay to me, but as mention in the other review I would introduce
the comment flags first and then add the flags to blkdev_issue_flush,
not the other way around.

2010-04-13 18:10:25

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 1/4] blkdev: pass gfp_mask and flags to blkdev_issue_flush

> +enum{
> + __BLKDEV_IFL_WAIT, /* wait for completion */
> + __BLKDEV_IFL_BARRIER, /*issue request with barrier */
> +};
> +#define BLKDEV_IFL_WAIT (1 << __BLKDEV_IFL_WAIT)
> +#define BLKDEV_IFL_BARRIER (1 << __BLKDEV_IFL_BARRIER)

This is a very awkward stayle to define flags. There really should
be no need for the __-prefixed version. While you're using them for
test/set_bit and co there's no reason to use these atomic bitops here.

2010-04-14 06:19:16

by Dmitry Monakhov

[permalink] [raw]
Subject: Re: [PATCH 1/4] blkdev: pass gfp_mask and flags to blkdev_issue_flush

Christoph Hellwig <[email protected]> writes:

>> +enum{
>> + __BLKDEV_IFL_WAIT, /* wait for completion */
>> + __BLKDEV_IFL_BARRIER, /*issue request with barrier */
>> +};
>> +#define BLKDEV_IFL_WAIT (1 << __BLKDEV_IFL_WAIT)
>> +#define BLKDEV_IFL_BARRIER (1 << __BLKDEV_IFL_BARRIER)
>
> This is a very awkward stayle to define flags. There really should
> be no need for the __-prefixed version. While you're using them for
> test/set_bit and co there's no reason to use these atomic bitops here.
I need both bit_num(used inside function) and flag (1<<bit_num)
which is used by function caller.
No problem, i'll change it whenever you like
do you like following?
enum{
IFN_BLKDEV_WAIT, /* wait for completion */
IFN_BLKDEV_BARRIER, /*issue request with barrier */
};
#define BLKDEV_WAIT (1 << IFN_BLKDEV_WAIT)
#define BLKDEV_BARRIER (1 << IFN_BLKDEV_BARRIER)

2010-04-26 15:01:47

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 0/4] blkdev: blkdev_issue_fn cleanups v2

Jens, are you planning on picking up a version of this?

On Mon, Apr 12, 2010 at 05:03:53PM +0400, Dmitry Monakhov wrote:
> - Convert all blkdev_issue_xxx function to common set of flags
> - move common functions to block/blk-lib.c
> - Add generic zeroout helper
>
> changes from V1
> Christoph is about to change discard memory payload allocation logic
> so discard cleanups are no longer necessary.
---end quoted text---

2010-04-26 17:26:53

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 0/4] blkdev: blkdev_issue_fn cleanups v2

On Mon, Apr 26 2010, Christoph Hellwig wrote:
> Jens, are you planning on picking up a version of this?

Yes, I was waiting for a repost to address your review concerns.

>
> On Mon, Apr 12, 2010 at 05:03:53PM +0400, Dmitry Monakhov wrote:
> > - Convert all blkdev_issue_xxx function to common set of flags
> > - move common functions to block/blk-lib.c
> > - Add generic zeroout helper
> >
> > changes from V1
> > Christoph is about to change discard memory payload allocation logic
> > so discard cleanups are no longer necessary.
> ---end quoted text---

--
Jens Axboe

2010-04-26 17:32:33

by Dmitry Monakhov

[permalink] [raw]
Subject: Re: [PATCH 0/4] blkdev: blkdev_issue_fn cleanups v2

Jens Axboe <[email protected]> writes:

> On Mon, Apr 26 2010, Christoph Hellwig wrote:
>> Jens, are you planning on picking up a version of this?
>
> Yes, I was waiting for a repost to address your review concerns.
Ok, will appear in several hours. I have to repeat full testing
cycle, I don't want to break lvm one more time :)
>
>>
>> On Mon, Apr 12, 2010 at 05:03:53PM +0400, Dmitry Monakhov wrote:
>> > - Convert all blkdev_issue_xxx function to common set of flags
>> > - move common functions to block/blk-lib.c
>> > - Add generic zeroout helper
>> >
>> > changes from V1
>> > Christoph is about to change discard memory payload allocation logic
>> > so discard cleanups are no longer necessary.
>> ---end quoted text---

2010-04-26 17:47:25

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 0/4] blkdev: blkdev_issue_fn cleanups v2

On Mon, Apr 26 2010, Dmitry Monakhov wrote:
> Jens Axboe <[email protected]> writes:
>
> > On Mon, Apr 26 2010, Christoph Hellwig wrote:
> >> Jens, are you planning on picking up a version of this?
> >
> > Yes, I was waiting for a repost to address your review concerns.
> Ok, will appear in several hours. I have to repeat full testing
> cycle, I don't want to break lvm one more time :)

Take your time, I would appreciate it if lvm didn't get broken again :-)

--
Jens Axboe

2010-04-28 13:55:49

by Dmitry Monakhov

[permalink] [raw]
Subject: [PATCH 1/4] blkdev: generalize flags for blkdev_issue_fn functions

The patch just convert all blkdev_issue_xxx function to common
set of flags. Wait/allocation semantics preserved.

Signed-off-by: Dmitry Monakhov <[email protected]>
---
block/blk-barrier.c | 20 +++++++++++---------
block/ioctl.c | 2 +-
drivers/block/drbd/drbd_int.h | 3 ++-
drivers/block/drbd/drbd_receiver.c | 3 ++-
fs/block_dev.c | 3 ++-
fs/btrfs/extent-tree.c | 2 +-
fs/ext3/fsync.c | 3 ++-
fs/ext4/fsync.c | 6 ++++--
fs/gfs2/rgrp.c | 5 +++--
fs/jbd2/checkpoint.c | 3 ++-
fs/jbd2/commit.c | 6 ++++--
fs/reiserfs/file.c | 3 ++-
fs/xfs/linux-2.6/xfs_super.c | 3 ++-
include/linux/blkdev.h | 18 +++++++++++-------
mm/swapfile.c | 9 ++++++---
15 files changed, 55 insertions(+), 34 deletions(-)

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 6d88544..cf14311 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -293,19 +293,22 @@ static void bio_end_empty_barrier(struct bio *bio, int err)
/**
* blkdev_issue_flush - queue a flush
* @bdev: blockdev to issue flush for
+ * @gfp_mask: memory allocation flags (for bio_alloc)
* @error_sector: error sector
+ * @flags: BLKDEV_IFL_* flags to control behaviour
*
* Description:
* Issue a flush for the block device in question. Caller can supply
* room for storing the error offset in case of a flush error, if they
* wish to.
*/
-int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
+int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
+ sector_t *error_sector, unsigned long flags)
{
DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q;
struct bio *bio;
- int ret;
+ int ret = 0;

if (bdev->bd_disk == NULL)
return -ENXIO;
@@ -314,7 +317,7 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
if (!q)
return -ENXIO;

- bio = bio_alloc(GFP_KERNEL, 0);
+ bio = bio_alloc(gfp_mask, 0);
bio->bi_end_io = bio_end_empty_barrier;
bio->bi_private = &wait;
bio->bi_bdev = bdev;
@@ -330,7 +333,6 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
if (error_sector)
*error_sector = bio->bi_sector;

- ret = 0;
if (bio_flagged(bio, BIO_EOPNOTSUPP))
ret = -EOPNOTSUPP;
else if (!bio_flagged(bio, BIO_UPTODATE))
@@ -362,17 +364,17 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
* @sector: start sector
* @nr_sects: number of sectors to discard
* @gfp_mask: memory allocation flags (for bio_alloc)
- * @flags: DISCARD_FL_* flags to control behaviour
+ * @flags: BLKDEV_IFL_* flags to control behaviour
*
* Description:
* Issue a discard request for the sectors in question.
*/
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, int flags)
+ sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
{
DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q = bdev_get_queue(bdev);
- int type = flags & DISCARD_FL_BARRIER ?
+ int type = flags & BLKDEV_IFL_BARRIER ?
DISCARD_BARRIER : DISCARD_NOBARRIER;
struct bio *bio;
struct page *page;
@@ -395,7 +397,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
bio->bi_sector = sector;
bio->bi_end_io = blkdev_discard_end_io;
bio->bi_bdev = bdev;
- if (flags & DISCARD_FL_WAIT)
+ if (flags & BLKDEV_IFL_WAIT)
bio->bi_private = &wait;

/*
@@ -426,7 +428,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
bio_get(bio);
submit_bio(type, bio);

- if (flags & DISCARD_FL_WAIT)
+ if (flags & BLKDEV_IFL_WAIT)
wait_for_completion(&wait);

if (bio_flagged(bio, BIO_EOPNOTSUPP))
diff --git a/block/ioctl.c b/block/ioctl.c
index 8905d2a..e8eb679 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -126,7 +126,7 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
if (start + len > (bdev->bd_inode->i_size >> 9))
return -EINVAL;
return blkdev_issue_discard(bdev, start, len, GFP_KERNEL,
- DISCARD_FL_WAIT);
+ BLKDEV_IFL_WAIT);
}

static int put_ushort(unsigned long arg, unsigned short val)
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index e5e86a7..d6f1ae3 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -2251,7 +2251,8 @@ static inline void drbd_md_flush(struct drbd_conf *mdev)
if (test_bit(MD_NO_BARRIER, &mdev->flags))
return;

- r = blkdev_issue_flush(mdev->ldev->md_bdev, NULL);
+ r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
if (r) {
set_bit(MD_NO_BARRIER, &mdev->flags);
dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 3f096e7..c786023 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -946,7 +946,8 @@ static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d
int rv;

if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
- rv = blkdev_issue_flush(mdev->ldev->backing_bdev, NULL);
+ rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
+ NULL, BLKDEV_IFL_WAIT);
if (rv) {
dev_err(DEV, "local disk flush failed with status %d\n", rv);
/* would rather check on EOPNOTSUPP, but that is not reliable.
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ea8385e..dd76930 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -413,7 +413,8 @@ int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync)
if (error)
return error;

- error = blkdev_issue_flush(bdev, NULL);
+ error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL,
+ (BLKDEV_IFL_WAIT));
if (error == -EOPNOTSUPP)
error = 0;
return error;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b34d32f..c6a4f45 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1589,7 +1589,7 @@ static void btrfs_issue_discard(struct block_device *bdev,
u64 start, u64 len)
{
blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
- DISCARD_FL_BARRIER);
+ BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
}

static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 8209f26..9492f60 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -91,7 +91,8 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
* storage
*/
if (test_opt(inode->i_sb, BARRIER))
- blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+ blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
out:
return ret;
}
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0d0c323..ef3d980 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -100,9 +100,11 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
if (ext4_should_writeback_data(inode) &&
(journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
- blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+ blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
+ NULL, BLKDEV_IFL_WAIT);
jbd2_log_wait_commit(journal, commit_tid);
} else if (journal->j_flags & JBD2_BARRIER)
- blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+ blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
return ret;
}
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 503b842..bf011dc 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -854,7 +854,8 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
if ((start + nr_sects) != blk) {
rv = blkdev_issue_discard(bdev, start,
nr_sects, GFP_NOFS,
- DISCARD_FL_BARRIER);
+ BLKDEV_IFL_WAIT |
+ BLKDEV_IFL_BARRIER);
if (rv)
goto fail;
nr_sects = 0;
@@ -869,7 +870,7 @@ start_new_extent:
}
if (nr_sects) {
rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS,
- DISCARD_FL_BARRIER);
+ BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
if (rv)
goto fail;
}
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 30beb11..076d1cc 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -530,7 +530,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
*/
if ((journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
- blkdev_issue_flush(journal->j_fs_dev, NULL);
+ blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
if (!(journal->j_flags & JBD2_ABORT))
jbd2_journal_update_superblock(journal, 1);
return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 671da7f..75716d3 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -717,7 +717,8 @@ start_journal_io:
if (commit_transaction->t_flushed_data_blocks &&
(journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
- blkdev_issue_flush(journal->j_fs_dev, NULL);
+ blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);

/* Done it all: now write the commit record asynchronously. */
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
@@ -727,7 +728,8 @@ start_journal_io:
if (err)
__jbd2_journal_abort_hard(journal);
if (journal->j_flags & JBD2_BARRIER)
- blkdev_issue_flush(journal->j_dev, NULL);
+ blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
}

err = journal_finish_inode_data_buffers(journal, commit_transaction);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 1d9c127..9977df9 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -147,7 +147,8 @@ static int reiserfs_sync_file(struct file *filp,
barrier_done = reiserfs_commit_for_inode(inode);
reiserfs_write_unlock(inode->i_sb);
if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
- blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+ blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
if (barrier_done < 0)
return barrier_done;
return (err < 0) ? -EIO : 0;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 52e06b4..2b177c7 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -725,7 +725,8 @@ void
xfs_blkdev_issue_flush(
xfs_buftarg_t *buftarg)
{
- blkdev_issue_flush(buftarg->bt_bdev, NULL);
+ blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
}

STATIC void
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5cf17a4..59b9aed 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -998,12 +998,16 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
return NULL;
return bqt->tag_index[tag];
}
-
-extern int blkdev_issue_flush(struct block_device *, sector_t *);
-#define DISCARD_FL_WAIT 0x01 /* wait for completion */
-#define DISCARD_FL_BARRIER 0x02 /* issue DISCARD_BARRIER request */
-extern int blkdev_issue_discard(struct block_device *, sector_t sector,
- sector_t nr_sects, gfp_t, int flags);
+enum{
+ BLKDEV_WAIT, /* wait for completion */
+ BLKDEV_BARRIER, /*issue request with barrier */
+};
+#define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT)
+#define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER)
+extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *,
+ unsigned long);
+extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);

static inline int sb_issue_discard(struct super_block *sb,
sector_t block, sector_t nr_blocks)
@@ -1011,7 +1015,7 @@ static inline int sb_issue_discard(struct super_block *sb,
block <<= (sb->s_blocksize_bits - 9);
nr_blocks <<= (sb->s_blocksize_bits - 9);
return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL,
- DISCARD_FL_BARRIER);
+ BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
}

extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6cd0a8f..eb086e0 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -139,7 +139,8 @@ static int discard_swap(struct swap_info_struct *si)
nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
if (nr_blocks) {
err = blkdev_issue_discard(si->bdev, start_block,
- nr_blocks, GFP_KERNEL, DISCARD_FL_BARRIER);
+ nr_blocks, GFP_KERNEL,
+ BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
if (err)
return err;
cond_resched();
@@ -150,7 +151,8 @@ static int discard_swap(struct swap_info_struct *si)
nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);

err = blkdev_issue_discard(si->bdev, start_block,
- nr_blocks, GFP_KERNEL, DISCARD_FL_BARRIER);
+ nr_blocks, GFP_KERNEL,
+ BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
if (err)
break;

@@ -189,7 +191,8 @@ static void discard_swap_cluster(struct swap_info_struct *si,
start_block <<= PAGE_SHIFT - 9;
nr_blocks <<= PAGE_SHIFT - 9;
if (blkdev_issue_discard(si->bdev, start_block,
- nr_blocks, GFP_NOIO, DISCARD_FL_BARRIER))
+ nr_blocks, GFP_NOIO, BLKDEV_IFL_WAIT |
+ BLKDEV_IFL_BARRIER))
break;
}

--
1.6.6.1

2010-04-28 13:55:25

by Dmitry Monakhov

[permalink] [raw]
Subject: [PATCH 3/4] blkdev: move blkdev_issue helper functions to separate file

Move blkdev_issue_discard from blk-barrier.c because it is
not barrier related.
Later the file will be populated by other helpers.

Signed-off-by: Dmitry Monakhov <[email protected]>
---
block/Makefile | 2 +-
block/blk-barrier.c | 104 ----------------------------------------------
block/blk-lib.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 115 insertions(+), 105 deletions(-)
create mode 100644 block/blk-lib.c

diff --git a/block/Makefile b/block/Makefile
index cb2d515..0bb499a 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
- blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o
+ blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o

obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index f11eec9..0d710c9 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -347,107 +347,3 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
return ret;
}
EXPORT_SYMBOL(blkdev_issue_flush);
-
-static void blkdev_discard_end_io(struct bio *bio, int err)
-{
- if (err) {
- if (err == -EOPNOTSUPP)
- set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
- }
-
- if (bio->bi_private)
- complete(bio->bi_private);
- __free_page(bio_page(bio));
-
- bio_put(bio);
-}
-
-/**
- * blkdev_issue_discard - queue a discard
- * @bdev: blockdev to issue discard for
- * @sector: start sector
- * @nr_sects: number of sectors to discard
- * @gfp_mask: memory allocation flags (for bio_alloc)
- * @flags: BLKDEV_IFL_* flags to control behaviour
- *
- * Description:
- * Issue a discard request for the sectors in question.
- */
-int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
-{
- DECLARE_COMPLETION_ONSTACK(wait);
- struct request_queue *q = bdev_get_queue(bdev);
- int type = flags & BLKDEV_IFL_BARRIER ?
- DISCARD_BARRIER : DISCARD_NOBARRIER;
- struct bio *bio;
- struct page *page;
- int ret = 0;
-
- if (!q)
- return -ENXIO;
-
- if (!blk_queue_discard(q))
- return -EOPNOTSUPP;
-
- while (nr_sects && !ret) {
- unsigned int sector_size = q->limits.logical_block_size;
- unsigned int max_discard_sectors =
- min(q->limits.max_discard_sectors, UINT_MAX >> 9);
-
- bio = bio_alloc(gfp_mask, 1);
- if (!bio)
- goto out;
- bio->bi_sector = sector;
- bio->bi_end_io = blkdev_discard_end_io;
- bio->bi_bdev = bdev;
- if (flags & BLKDEV_IFL_WAIT)
- bio->bi_private = &wait;
-
- /*
- * Add a zeroed one-sector payload as that's what
- * our current implementations need. If we'll ever need
- * more the interface will need revisiting.
- */
- page = alloc_page(gfp_mask | __GFP_ZERO);
- if (!page)
- goto out_free_bio;
- if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
- goto out_free_page;
-
- /*
- * And override the bio size - the way discard works we
- * touch many more blocks on disk than the actual payload
- * length.
- */
- if (nr_sects > max_discard_sectors) {
- bio->bi_size = max_discard_sectors << 9;
- nr_sects -= max_discard_sectors;
- sector += max_discard_sectors;
- } else {
- bio->bi_size = nr_sects << 9;
- nr_sects = 0;
- }
-
- bio_get(bio);
- submit_bio(type, bio);
-
- if (flags & BLKDEV_IFL_WAIT)
- wait_for_completion(&wait);
-
- if (bio_flagged(bio, BIO_EOPNOTSUPP))
- ret = -EOPNOTSUPP;
- else if (!bio_flagged(bio, BIO_UPTODATE))
- ret = -EIO;
- bio_put(bio);
- }
- return ret;
-out_free_page:
- __free_page(page);
-out_free_bio:
- bio_put(bio);
-out:
- return -ENOMEM;
-}
-EXPORT_SYMBOL(blkdev_issue_discard);
diff --git a/block/blk-lib.c b/block/blk-lib.c
new file mode 100644
index 0000000..0dc4388
--- /dev/null
+++ b/block/blk-lib.c
@@ -0,0 +1,114 @@
+/*
+ * Functions related to generic helpers functions
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/scatterlist.h>
+
+#include "blk.h"
+
+static void blkdev_discard_end_io(struct bio *bio, int err)
+{
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ }
+
+ if (bio->bi_private)
+ complete(bio->bi_private);
+ __free_page(bio_page(bio));
+
+ bio_put(bio);
+}
+
+/**
+ * blkdev_issue_discard - queue a discard
+ * @bdev: blockdev to issue discard for
+ * @sector: start sector
+ * @nr_sects: number of sectors to discard
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ * @flags: BLKDEV_IFL_* flags to control behaviour
+ *
+ * Description:
+ * Issue a discard request for the sectors in question.
+ */
+int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+{
+ DECLARE_COMPLETION_ONSTACK(wait);
+ struct request_queue *q = bdev_get_queue(bdev);
+ int type = flags & BLKDEV_IFL_BARRIER ?
+ DISCARD_BARRIER : DISCARD_NOBARRIER;
+ struct bio *bio;
+ struct page *page;
+ int ret = 0;
+
+ if (!q)
+ return -ENXIO;
+
+ if (!blk_queue_discard(q))
+ return -EOPNOTSUPP;
+
+ while (nr_sects && !ret) {
+ unsigned int sector_size = q->limits.logical_block_size;
+ unsigned int max_discard_sectors =
+ min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+
+ bio = bio_alloc(gfp_mask, 1);
+ if (!bio)
+ goto out;
+ bio->bi_sector = sector;
+ bio->bi_end_io = blkdev_discard_end_io;
+ bio->bi_bdev = bdev;
+ if (flags & BLKDEV_IFL_WAIT)
+ bio->bi_private = &wait;
+
+ /*
+ * Add a zeroed one-sector payload as that's what
+ * our current implementations need. If we'll ever need
+ * more the interface will need revisiting.
+ */
+ page = alloc_page(gfp_mask | __GFP_ZERO);
+ if (!page)
+ goto out_free_bio;
+ if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
+ goto out_free_page;
+
+ /*
+ * And override the bio size - the way discard works we
+ * touch many more blocks on disk than the actual payload
+ * length.
+ */
+ if (nr_sects > max_discard_sectors) {
+ bio->bi_size = max_discard_sectors << 9;
+ nr_sects -= max_discard_sectors;
+ sector += max_discard_sectors;
+ } else {
+ bio->bi_size = nr_sects << 9;
+ nr_sects = 0;
+ }
+
+ bio_get(bio);
+ submit_bio(type, bio);
+
+ if (flags & BLKDEV_IFL_WAIT)
+ wait_for_completion(&wait);
+
+ if (bio_flagged(bio, BIO_EOPNOTSUPP))
+ ret = -EOPNOTSUPP;
+ else if (!bio_flagged(bio, BIO_UPTODATE))
+ ret = -EIO;
+ bio_put(bio);
+ }
+ return ret;
+out_free_page:
+ __free_page(page);
+out_free_bio:
+ bio_put(bio);
+out:
+ return -ENOMEM;
+}
+EXPORT_SYMBOL(blkdev_issue_discard);
--
1.6.6.1

2010-04-28 13:55:20

by Dmitry Monakhov

[permalink] [raw]
Subject: [PATCH 0/4] blkdev: blkdev_issue_fn cleanups v3

- Convert all blkdev_issue_xxx function to common set of flags
- move common helper functions to block/blk-lib.c
- Add generic zeroout helper

Tested configuration: ext4 w -odiscard over raw_bdisk and dm-linear
changes from V2
Fix flag names, and rearange patches according to Christoph's comments.

Signed-off-by: Dmitry Monakhov <[email protected]>
---
Makefile | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/Makefile b/Makefile
index fa1db90..285a0fa 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 34
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc5-
NAME = Sheep on Meth

# *DOCUMENTATION*
--
1.6.6.1

2010-04-28 13:55:47

by Dmitry Monakhov

[permalink] [raw]
Subject: [PATCH 2/4] blkdev: allow async blkdev_issue_flush requests

In some places caller don't want to wait a request to complete.

Signed-off-by: Dmitry Monakhov <[email protected]>
---
block/blk-barrier.c | 33 +++++++++++++++++++--------------
1 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index cf14311..f11eec9 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -286,8 +286,9 @@ static void bio_end_empty_barrier(struct bio *bio, int err)
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
clear_bit(BIO_UPTODATE, &bio->bi_flags);
}
-
- complete(bio->bi_private);
+ if (bio->bi_private)
+ complete(bio->bi_private);
+ bio_put(bio);
}

/**
@@ -300,7 +301,8 @@ static void bio_end_empty_barrier(struct bio *bio, int err)
* Description:
* Issue a flush for the block device in question. Caller can supply
* room for storing the error offset in case of a flush error, if they
- * wish to.
+ * wish to. If WAIT flag is not passed then caller may check only what
+ * request was pushed in some internal queue for later handling.
*/
int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
sector_t *error_sector, unsigned long flags)
@@ -319,19 +321,22 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,

bio = bio_alloc(gfp_mask, 0);
bio->bi_end_io = bio_end_empty_barrier;
- bio->bi_private = &wait;
bio->bi_bdev = bdev;
- submit_bio(WRITE_BARRIER, bio);
-
- wait_for_completion(&wait);
+ if (test_bit(BLKDEV_WAIT, &flags))
+ bio->bi_private = &wait;

- /*
- * The driver must store the error location in ->bi_sector, if
- * it supports it. For non-stacked drivers, this should be copied
- * from blk_rq_pos(rq).
- */
- if (error_sector)
- *error_sector = bio->bi_sector;
+ bio_get(bio);
+ submit_bio(WRITE_BARRIER, bio);
+ if (test_bit(BLKDEV_WAIT, &flags)) {
+ wait_for_completion(&wait);
+ /*
+ * The driver must store the error location in ->bi_sector, if
+ * it supports it. For non-stacked drivers, this should be
+ * copied from blk_rq_pos(rq).
+ */
+ if (error_sector)
+ *error_sector = bio->bi_sector;
+ }

if (bio_flagged(bio, BIO_EOPNOTSUPP))
ret = -EOPNOTSUPP;
--
1.6.6.1

2010-04-28 18:17:46

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 0/4] blkdev: blkdev_issue_fn cleanups v3

On Wed, Apr 28 2010, Dmitry Monakhov wrote:
> - Convert all blkdev_issue_xxx function to common set of flags
> - move common helper functions to block/blk-lib.c
> - Add generic zeroout helper
>
> Tested configuration: ext4 w -odiscard over raw_bdisk and dm-linear
> changes from V2
> Fix flag names, and rearange patches according to Christoph's comments.

Thanks, I've applied the series.

--
Jens Axboe

2010-04-28 13:55:23

by Dmitry Monakhov

[permalink] [raw]
Subject: [PATCH 4/4] blkdev: add blkdev_issue_zeroout helper function

- Add bio_batch helper primitive. This is rather generic primitive
for submitting/waiting a complex request which consists of several
bios.
- blkdev_issue_zeroout() generate number of zero filed write bios.

Signed-off-by: Dmitry Monakhov <[email protected]>
---
block/blk-lib.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/blkdev.h | 3 +-
2 files changed, 120 insertions(+), 1 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 0dc4388..886c3f9 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -112,3 +112,121 @@ out:
return -ENOMEM;
}
EXPORT_SYMBOL(blkdev_issue_discard);
+
+struct bio_batch
+{
+ atomic_t done;
+ unsigned long flags;
+ struct completion *wait;
+ bio_end_io_t *end_io;
+};
+
+static void bio_batch_end_io(struct bio *bio, int err)
+{
+ struct bio_batch *bb = bio->bi_private;
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ set_bit(BIO_EOPNOTSUPP, &bb->flags);
+ else
+ clear_bit(BIO_UPTODATE, &bb->flags);
+ }
+ if (bb) {
+ if (bb->end_io)
+ bb->end_io(bio, err);
+ atomic_inc(&bb->done);
+ complete(bb->wait);
+ }
+ bio_put(bio);
+}
+
+/**
+ * blkdev_issue_zeroout generate number of zero filed write bios
+ * @bdev: blockdev to issue
+ * @sector: start sector
+ * @nr_sects: number of sectors to write
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ * @flags: BLKDEV_IFL_* flags to control behaviour
+ *
+ * Description:
+ * Generate and issue number of bios with zerofiled pages.
+ * Send barrier at the beginning and at the end if requested. This guarantie
+ * correct request ordering. Empty barrier allow us to avoid post queue flush.
+ */
+
+int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+{
+ int ret = 0;
+ struct bio *bio;
+ struct bio_batch bb;
+ unsigned int sz, issued = 0;
+ DECLARE_COMPLETION_ONSTACK(wait);
+
+ atomic_set(&bb.done, 0);
+ bb.flags = 1 << BIO_UPTODATE;
+ bb.wait = &wait;
+ bb.end_io = NULL;
+
+ if (flags & BLKDEV_IFL_BARRIER) {
+ /* issue async barrier before the data */
+ ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0);
+ if (ret)
+ return ret;
+ }
+submit:
+ while (nr_sects != 0) {
+ bio = bio_alloc(gfp_mask,
+ min(nr_sects, (sector_t)BIO_MAX_PAGES));
+ if (!bio)
+ break;
+
+ bio->bi_sector = sector;
+ bio->bi_bdev = bdev;
+ bio->bi_end_io = bio_batch_end_io;
+ if (flags & BLKDEV_IFL_WAIT)
+ bio->bi_private = &bb;
+
+ while(nr_sects != 0) {
+ sz = min(PAGE_SIZE >> 9 , nr_sects);
+ if (sz == 0)
+ /* bio has maximum size possible */
+ break;
+ ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
+ nr_sects -= ret >> 9;
+ sector += ret >> 9;
+ if (ret < (sz << 9))
+ break;
+ }
+ issued++;
+ submit_bio(WRITE, bio);
+ }
+ /*
+ * When all data bios are in flight. Send final barrier if requeted.
+ */
+ if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER)
+ ret = blkdev_issue_flush(bdev, gfp_mask, NULL,
+ flags & BLKDEV_IFL_WAIT);
+
+
+ if (flags & BLKDEV_IFL_WAIT)
+ /* Wait for bios in-flight */
+ while ( issued != atomic_read(&bb.done))
+ wait_for_completion(&wait);
+
+ if (!test_bit(BIO_UPTODATE, &bb.flags))
+ /* One of bios in the batch was completed with error.*/
+ ret = -EIO;
+
+ if (ret)
+ goto out;
+
+ if (test_bit(BIO_EOPNOTSUPP, &bb.flags)) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+ if (nr_sects != 0)
+ goto submit;
+out:
+ return ret;
+}
+EXPORT_SYMBOL(blkdev_issue_zeroout);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 59b9aed..3ac2bd2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1008,7 +1008,8 @@ extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *,
unsigned long);
extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
-
+extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
static inline int sb_issue_discard(struct super_block *sb,
sector_t block, sector_t nr_blocks)
{
--
1.6.6.1