2023-06-28 08:33:19

by Nitesh Shetty

[permalink] [raw]
Subject: [PATCH v13 3/9] block: add emulation for copy

For the devices which does not support copy, copy emulation is added.
It is required for in-kernel users like fabrics, where file descriptor is
not available and hence they can't use copy_file_range.
Copy-emulation is implemented by reading from source into memory and
writing to the corresponding destination asynchronously.
Also emulation is used, if copy offload fails or partially completes.

Signed-off-by: Nitesh Shetty <[email protected]>
Signed-off-by: Vincent Fu <[email protected]>
Signed-off-by: Anuj Gupta <[email protected]>
---
block/blk-lib.c | 183 +++++++++++++++++++++++++++++++++++++-
block/blk-map.c | 4 +-
include/linux/blk_types.h | 5 ++
include/linux/blkdev.h | 3 +
4 files changed, 192 insertions(+), 3 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 10c3eadd5bf6..09e0d5d51d03 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -234,6 +234,180 @@ static ssize_t __blkdev_copy_offload(
return blkdev_copy_wait_io_completion(cio);
}

+static void *blkdev_copy_alloc_buf(sector_t req_size, sector_t *alloc_size,
+ gfp_t gfp_mask)
+{
+ int min_size = PAGE_SIZE;
+ void *buf;
+
+ while (req_size >= min_size) {
+ buf = kvmalloc(req_size, gfp_mask);
+ if (buf) {
+ *alloc_size = req_size;
+ return buf;
+ }
+ /* retry half the requested size */
+ req_size >>= 1;
+ }
+
+ return NULL;
+}
+
+static void blkdev_copy_emulate_write_endio(struct bio *bio)
+{
+ struct copy_ctx *ctx = bio->bi_private;
+ struct cio *cio = ctx->cio;
+ sector_t clen;
+
+ if (bio->bi_status) {
+ clen = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - cio->pos_out;
+ cio->comp_len = min_t(sector_t, clen, cio->comp_len);
+ }
+ kfree(bvec_virt(&bio->bi_io_vec[0]));
+ bio_map_kern_endio(bio);
+ kfree(ctx);
+ if (atomic_dec_and_test(&cio->refcount)) {
+ if (cio->endio) {
+ cio->endio(cio->private, cio->comp_len);
+ kfree(cio);
+ } else
+ blk_wake_io_task(cio->waiter);
+ }
+}
+
+static void blkdev_copy_emulate_read_endio(struct bio *read_bio)
+{
+ struct copy_ctx *ctx = read_bio->bi_private;
+ struct cio *cio = ctx->cio;
+ sector_t clen;
+
+ if (read_bio->bi_status) {
+ clen = (read_bio->bi_iter.bi_sector << SECTOR_SHIFT) -
+ cio->pos_in;
+ cio->comp_len = min_t(sector_t, clen, cio->comp_len);
+ kfree(bvec_virt(&read_bio->bi_io_vec[0]));
+ bio_map_kern_endio(read_bio);
+ kfree(ctx);
+
+ if (atomic_dec_and_test(&cio->refcount)) {
+ if (cio->endio) {
+ cio->endio(cio->private, cio->comp_len);
+ kfree(cio);
+ } else
+ blk_wake_io_task(cio->waiter);
+ }
+ }
+ schedule_work(&ctx->dispatch_work);
+ kfree(read_bio);
+}
+
+static void blkdev_copy_dispatch_work(struct work_struct *work)
+{
+ struct copy_ctx *ctx = container_of(work, struct copy_ctx,
+ dispatch_work);
+
+ submit_bio(ctx->write_bio);
+}
+
+/*
+ * If native copy offload feature is absent, this function tries to emulate,
+ * by copying data from source to a temporary buffer and from buffer to
+ * destination device.
+ * Returns the length of bytes copied or error if encountered
+ */
+static ssize_t __blkdev_copy_emulate(
+ struct block_device *bdev_in, loff_t pos_in,
+ struct block_device *bdev_out, loff_t pos_out,
+ size_t len, cio_iodone_t endio, void *private, gfp_t gfp_mask)
+{
+ struct request_queue *in = bdev_get_queue(bdev_in);
+ struct request_queue *out = bdev_get_queue(bdev_out);
+ struct bio *read_bio, *write_bio;
+ void *buf = NULL;
+ struct copy_ctx *ctx;
+ struct cio *cio;
+ sector_t buf_len, req_len, rem = 0;
+ sector_t max_src_hw_len = min_t(unsigned int,
+ queue_max_hw_sectors(in),
+ queue_max_segments(in) << (PAGE_SHIFT - SECTOR_SHIFT))
+ << SECTOR_SHIFT;
+ sector_t max_dst_hw_len = min_t(unsigned int,
+ queue_max_hw_sectors(out),
+ queue_max_segments(out) << (PAGE_SHIFT - SECTOR_SHIFT))
+ << SECTOR_SHIFT;
+ sector_t max_hw_len = min_t(unsigned int,
+ max_src_hw_len, max_dst_hw_len);
+
+ cio = kzalloc(sizeof(struct cio), GFP_KERNEL);
+ if (!cio)
+ return -ENOMEM;
+ atomic_set(&cio->refcount, 0);
+ cio->pos_in = pos_in;
+ cio->pos_out = pos_out;
+ cio->waiter = current;
+ cio->endio = endio;
+ cio->private = private;
+
+ for (rem = len; rem > 0; rem -= buf_len) {
+ req_len = min_t(int, max_hw_len, rem);
+
+ buf = blkdev_copy_alloc_buf(req_len, &buf_len, gfp_mask);
+ if (!buf)
+ goto err_alloc_buf;
+
+ ctx = kzalloc(sizeof(struct copy_ctx), gfp_mask);
+ if (!ctx)
+ goto err_ctx;
+
+ read_bio = bio_map_kern(in, buf, buf_len, gfp_mask);
+ if (IS_ERR(read_bio))
+ goto err_read_bio;
+
+ write_bio = bio_map_kern(out, buf, buf_len, gfp_mask);
+ if (IS_ERR(write_bio))
+ goto err_write_bio;
+
+ ctx->cio = cio;
+ ctx->write_bio = write_bio;
+ INIT_WORK(&ctx->dispatch_work, blkdev_copy_dispatch_work);
+
+ read_bio->bi_iter.bi_sector = pos_in >> SECTOR_SHIFT;
+ read_bio->bi_iter.bi_size = buf_len;
+ read_bio->bi_opf = REQ_OP_READ | REQ_SYNC;
+ bio_set_dev(read_bio, bdev_in);
+ read_bio->bi_end_io = blkdev_copy_emulate_read_endio;
+ read_bio->bi_private = ctx;
+
+ write_bio->bi_iter.bi_size = buf_len;
+ write_bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
+ bio_set_dev(write_bio, bdev_out);
+ write_bio->bi_end_io = blkdev_copy_emulate_write_endio;
+ write_bio->bi_iter.bi_sector = pos_out >> SECTOR_SHIFT;
+ write_bio->bi_private = ctx;
+
+ atomic_inc(&cio->refcount);
+ submit_bio(read_bio);
+
+ pos_in += buf_len;
+ pos_out += buf_len;
+ }
+ return blkdev_copy_wait_io_completion(cio);
+
+err_write_bio:
+ bio_put(read_bio);
+err_read_bio:
+ kfree(ctx);
+err_ctx:
+ kvfree(buf);
+err_alloc_buf:
+ cio->comp_len -= min_t(sector_t, cio->comp_len, len - rem);
+ if (!atomic_read(&cio->refcount)) {
+ kfree(cio);
+ return -ENOMEM;
+ }
+ return blkdev_copy_wait_io_completion(cio);
+}
+
static inline ssize_t blkdev_copy_sanity_check(
struct block_device *bdev_in, loff_t pos_in,
struct block_device *bdev_out, loff_t pos_out,
@@ -284,9 +458,16 @@ ssize_t blkdev_copy_offload(
if (ret)
return ret;

- if (blk_queue_copy(q_in) && blk_queue_copy(q_out))
+ if (blk_queue_copy(q_in) && blk_queue_copy(q_out)) {
ret = __blkdev_copy_offload(bdev_in, pos_in, bdev_out, pos_out,
len, endio, private, gfp_mask);
+ if (ret < 0)
+ ret = 0;
+ }
+
+ if (ret != len)
+ ret = __blkdev_copy_emulate(bdev_in, pos_in + ret, bdev_out,
+ pos_out + ret, len - ret, endio, private, gfp_mask);

return ret;
}
diff --git a/block/blk-map.c b/block/blk-map.c
index 44d74a30ddac..ceeb70a95fd1 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -363,7 +363,7 @@ static void bio_invalidate_vmalloc_pages(struct bio *bio)
#endif
}

-static void bio_map_kern_endio(struct bio *bio)
+void bio_map_kern_endio(struct bio *bio)
{
bio_invalidate_vmalloc_pages(bio);
bio_uninit(bio);
@@ -380,7 +380,7 @@ static void bio_map_kern_endio(struct bio *bio)
* Map the kernel address into a bio suitable for io to a block
* device. Returns an error pointer in case of error.
*/
-static struct bio *bio_map_kern(struct request_queue *q, void *data,
+struct bio *bio_map_kern(struct request_queue *q, void *data,
unsigned int len, gfp_t gfp_mask)
{
unsigned long kaddr = (unsigned long)data;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 336146798e56..f8c80940c7ad 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -562,4 +562,9 @@ struct cio {
atomic_t refcount;
};

+struct copy_ctx {
+ struct cio *cio;
+ struct work_struct dispatch_work;
+ struct bio *write_bio;
+};
#endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 963f5c97dec0..c176bf6173c5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1047,6 +1047,9 @@ ssize_t blkdev_copy_offload(
struct block_device *bdev_in, loff_t pos_in,
struct block_device *bdev_out, loff_t pos_out,
size_t len, cio_iodone_t end_io, void *private, gfp_t gfp_mask);
+struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
+ gfp_t gfp_mask);
+void bio_map_kern_endio(struct bio *bio);

#define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */
#define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */
--
2.35.1.500.gb896f729e2



2023-06-28 08:41:53

by Damien Le Moal

[permalink] [raw]
Subject: Re: [PATCH v13 3/9] block: add emulation for copy

On 6/28/23 03:36, Nitesh Shetty wrote:
> For the devices which does not support copy, copy emulation is added.
> It is required for in-kernel users like fabrics, where file descriptor is
> not available and hence they can't use copy_file_range.
> Copy-emulation is implemented by reading from source into memory and
> writing to the corresponding destination asynchronously.
> Also emulation is used, if copy offload fails or partially completes.
>
> Signed-off-by: Nitesh Shetty <[email protected]>
> Signed-off-by: Vincent Fu <[email protected]>
> Signed-off-by: Anuj Gupta <[email protected]>
> ---
> block/blk-lib.c | 183 +++++++++++++++++++++++++++++++++++++-
> block/blk-map.c | 4 +-
> include/linux/blk_types.h | 5 ++
> include/linux/blkdev.h | 3 +
> 4 files changed, 192 insertions(+), 3 deletions(-)
>
> diff --git a/block/blk-lib.c b/block/blk-lib.c
> index 10c3eadd5bf6..09e0d5d51d03 100644
> --- a/block/blk-lib.c
> +++ b/block/blk-lib.c
> @@ -234,6 +234,180 @@ static ssize_t __blkdev_copy_offload(
> return blkdev_copy_wait_io_completion(cio);
> }
>
> +static void *blkdev_copy_alloc_buf(sector_t req_size, sector_t *alloc_size,
> + gfp_t gfp_mask)
> +{
> + int min_size = PAGE_SIZE;
> + void *buf;
> +
> + while (req_size >= min_size) {
> + buf = kvmalloc(req_size, gfp_mask);
> + if (buf) {
> + *alloc_size = req_size;
> + return buf;
> + }
> + /* retry half the requested size */

Kind of obvious :)

> + req_size >>= 1;
> + }
> +
> + return NULL;
> +}
> +
> +static void blkdev_copy_emulate_write_endio(struct bio *bio)
> +{
> + struct copy_ctx *ctx = bio->bi_private;
> + struct cio *cio = ctx->cio;
> + sector_t clen;
> +
> + if (bio->bi_status) {
> + clen = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - cio->pos_out;
> + cio->comp_len = min_t(sector_t, clen, cio->comp_len);
> + }
> + kfree(bvec_virt(&bio->bi_io_vec[0]));
> + bio_map_kern_endio(bio);
> + kfree(ctx);
> + if (atomic_dec_and_test(&cio->refcount)) {
> + if (cio->endio) {
> + cio->endio(cio->private, cio->comp_len);
> + kfree(cio);
> + } else
> + blk_wake_io_task(cio->waiter);

Curly brackets.

> + }
> +}
> +
> +static void blkdev_copy_emulate_read_endio(struct bio *read_bio)
> +{
> + struct copy_ctx *ctx = read_bio->bi_private;
> + struct cio *cio = ctx->cio;
> + sector_t clen;
> +
> + if (read_bio->bi_status) {
> + clen = (read_bio->bi_iter.bi_sector << SECTOR_SHIFT) -
> + cio->pos_in;
> + cio->comp_len = min_t(sector_t, clen, cio->comp_len);
> + kfree(bvec_virt(&read_bio->bi_io_vec[0]));
> + bio_map_kern_endio(read_bio);
> + kfree(ctx);
> +
> + if (atomic_dec_and_test(&cio->refcount)) {
> + if (cio->endio) {
> + cio->endio(cio->private, cio->comp_len);
> + kfree(cio);
> + } else
> + blk_wake_io_task(cio->waiter);

Same.

> + }
> + }
> + schedule_work(&ctx->dispatch_work);

ctx may have been freed above.

> + kfree(read_bio);
> +}
> +
> +static void blkdev_copy_dispatch_work(struct work_struct *work)
> +{
> + struct copy_ctx *ctx = container_of(work, struct copy_ctx,
> + dispatch_work);

Please align the argument, or even better: split the line after "=".

> +
> + submit_bio(ctx->write_bio);
> +}
> +
> +/*
> + * If native copy offload feature is absent, this function tries to emulate,
> + * by copying data from source to a temporary buffer and from buffer to
> + * destination device.
> + * Returns the length of bytes copied or error if encountered
> + */
> +static ssize_t __blkdev_copy_emulate(
> + struct block_device *bdev_in, loff_t pos_in,
> + struct block_device *bdev_out, loff_t pos_out,
> + size_t len, cio_iodone_t endio, void *private, gfp_t gfp_mask)
> +{
> + struct request_queue *in = bdev_get_queue(bdev_in);
> + struct request_queue *out = bdev_get_queue(bdev_out);
> + struct bio *read_bio, *write_bio;
> + void *buf = NULL;
> + struct copy_ctx *ctx;
> + struct cio *cio;
> + sector_t buf_len, req_len, rem = 0;
> + sector_t max_src_hw_len = min_t(unsigned int,
> + queue_max_hw_sectors(in),
> + queue_max_segments(in) << (PAGE_SHIFT - SECTOR_SHIFT))
> + << SECTOR_SHIFT;
> + sector_t max_dst_hw_len = min_t(unsigned int,
> + queue_max_hw_sectors(out),
> + queue_max_segments(out) << (PAGE_SHIFT - SECTOR_SHIFT))
> + << SECTOR_SHIFT;
> + sector_t max_hw_len = min_t(unsigned int,
> + max_src_hw_len, max_dst_hw_len);
> +
> + cio = kzalloc(sizeof(struct cio), GFP_KERNEL);
> + if (!cio)
> + return -ENOMEM;
> + atomic_set(&cio->refcount, 0);
> + cio->pos_in = pos_in;
> + cio->pos_out = pos_out;
> + cio->waiter = current;
> + cio->endio = endio;
> + cio->private = private;
> +
> + for (rem = len; rem > 0; rem -= buf_len) {
> + req_len = min_t(int, max_hw_len, rem);
> +
> + buf = blkdev_copy_alloc_buf(req_len, &buf_len, gfp_mask);
> + if (!buf)
> + goto err_alloc_buf;
> +
> + ctx = kzalloc(sizeof(struct copy_ctx), gfp_mask);
> + if (!ctx)
> + goto err_ctx;
> +
> + read_bio = bio_map_kern(in, buf, buf_len, gfp_mask);
> + if (IS_ERR(read_bio))
> + goto err_read_bio;
> +
> + write_bio = bio_map_kern(out, buf, buf_len, gfp_mask);
> + if (IS_ERR(write_bio))
> + goto err_write_bio;
> +
> + ctx->cio = cio;
> + ctx->write_bio = write_bio;
> + INIT_WORK(&ctx->dispatch_work, blkdev_copy_dispatch_work);
> +
> + read_bio->bi_iter.bi_sector = pos_in >> SECTOR_SHIFT;
> + read_bio->bi_iter.bi_size = buf_len;
> + read_bio->bi_opf = REQ_OP_READ | REQ_SYNC;
> + bio_set_dev(read_bio, bdev_in);
> + read_bio->bi_end_io = blkdev_copy_emulate_read_endio;
> + read_bio->bi_private = ctx;
> +
> + write_bio->bi_iter.bi_size = buf_len;
> + write_bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
> + bio_set_dev(write_bio, bdev_out);
> + write_bio->bi_end_io = blkdev_copy_emulate_write_endio;
> + write_bio->bi_iter.bi_sector = pos_out >> SECTOR_SHIFT;
> + write_bio->bi_private = ctx;
> +
> + atomic_inc(&cio->refcount);
> + submit_bio(read_bio);
> +
> + pos_in += buf_len;
> + pos_out += buf_len;
> + }
> + return blkdev_copy_wait_io_completion(cio);
> +
> +err_write_bio:
> + bio_put(read_bio);
> +err_read_bio:
> + kfree(ctx);
> +err_ctx:
> + kvfree(buf);
> +err_alloc_buf:
> + cio->comp_len -= min_t(sector_t, cio->comp_len, len - rem);
> + if (!atomic_read(&cio->refcount)) {
> + kfree(cio);
> + return -ENOMEM;
> + }
> + return blkdev_copy_wait_io_completion(cio);
> +}
> +
> static inline ssize_t blkdev_copy_sanity_check(
> struct block_device *bdev_in, loff_t pos_in,
> struct block_device *bdev_out, loff_t pos_out,
> @@ -284,9 +458,16 @@ ssize_t blkdev_copy_offload(
> if (ret)
> return ret;
>
> - if (blk_queue_copy(q_in) && blk_queue_copy(q_out))
> + if (blk_queue_copy(q_in) && blk_queue_copy(q_out)) {
> ret = __blkdev_copy_offload(bdev_in, pos_in, bdev_out, pos_out,
> len, endio, private, gfp_mask);
> + if (ret < 0)
> + ret = 0;
> + }
> +
> + if (ret != len)
> + ret = __blkdev_copy_emulate(bdev_in, pos_in + ret, bdev_out,
> + pos_out + ret, len - ret, endio, private, gfp_mask);
>
> return ret;
> }
> diff --git a/block/blk-map.c b/block/blk-map.c
> index 44d74a30ddac..ceeb70a95fd1 100644
> --- a/block/blk-map.c
> +++ b/block/blk-map.c
> @@ -363,7 +363,7 @@ static void bio_invalidate_vmalloc_pages(struct bio *bio)
> #endif
> }
>
> -static void bio_map_kern_endio(struct bio *bio)
> +void bio_map_kern_endio(struct bio *bio)
> {
> bio_invalidate_vmalloc_pages(bio);
> bio_uninit(bio);
> @@ -380,7 +380,7 @@ static void bio_map_kern_endio(struct bio *bio)
> * Map the kernel address into a bio suitable for io to a block
> * device. Returns an error pointer in case of error.
> */
> -static struct bio *bio_map_kern(struct request_queue *q, void *data,
> +struct bio *bio_map_kern(struct request_queue *q, void *data,
> unsigned int len, gfp_t gfp_mask)
> {
> unsigned long kaddr = (unsigned long)data;
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 336146798e56..f8c80940c7ad 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -562,4 +562,9 @@ struct cio {
> atomic_t refcount;
> };
>
> +struct copy_ctx {
> + struct cio *cio;
> + struct work_struct dispatch_work;
> + struct bio *write_bio;
> +};
> #endif /* __LINUX_BLK_TYPES_H */
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 963f5c97dec0..c176bf6173c5 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -1047,6 +1047,9 @@ ssize_t blkdev_copy_offload(
> struct block_device *bdev_in, loff_t pos_in,
> struct block_device *bdev_out, loff_t pos_out,
> size_t len, cio_iodone_t end_io, void *private, gfp_t gfp_mask);
> +struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
> + gfp_t gfp_mask);
> +void bio_map_kern_endio(struct bio *bio);
>
> #define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */
> #define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */

--
Damien Le Moal
Western Digital Research


2023-06-29 09:13:04

by Ming Lei

[permalink] [raw]
Subject: Re: [PATCH v13 3/9] block: add emulation for copy

Hi Nitesh,

On Wed, Jun 28, 2023 at 12:06:17AM +0530, Nitesh Shetty wrote:
> For the devices which does not support copy, copy emulation is added.
> It is required for in-kernel users like fabrics, where file descriptor is

I can understand copy command does help for FS GC and fabrics storages,
but still not very clear why copy emulation is needed for kernel users,
is it just for covering both copy command and emulation in single
interface? Or other purposes?

I'd suggest to add more words about in-kernel users of copy emulation.

> not available and hence they can't use copy_file_range.
> Copy-emulation is implemented by reading from source into memory and
> writing to the corresponding destination asynchronously.
> Also emulation is used, if copy offload fails or partially completes.

Per my understanding, this kind of emulation may not be as efficient
as doing it in userspace(two linked io_uring SQEs, read & write with
shared buffer). But it is fine if there are real in-kernel such users.


Thanks,
Ming


2023-07-20 07:52:51

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v13 3/9] block: add emulation for copy

> +static void *blkdev_copy_alloc_buf(sector_t req_size, sector_t *alloc_size,
> + gfp_t gfp_mask)
> +{
> + int min_size = PAGE_SIZE;
> + void *buf;
> +
> + while (req_size >= min_size) {
> + buf = kvmalloc(req_size, gfp_mask);
> + if (buf) {
> + *alloc_size = req_size;
> + return buf;
> + }
> + /* retry half the requested size */
> + req_size >>= 1;
> + }
> +
> + return NULL;

Is there any good reason for using vmalloc instead of a bunch
of distcontiguous pages?

> + ctx = kzalloc(sizeof(struct copy_ctx), gfp_mask);
> + if (!ctx)
> + goto err_ctx;

I'd suspect it would be better to just allocte a single buffer and
only have a single outstanding copy. That will reduce the bandwith
you can theoretically get, but copies tend to be background operations
anyway. It will reduce the required memory, and thus the chance for
this operation to fail on a loaded system. It will also dramatically
reduce the effect on memory managment.

> + read_bio = bio_map_kern(in, buf, buf_len, gfp_mask);
> + if (IS_ERR(read_bio))
> + goto err_read_bio;
> +
> + write_bio = bio_map_kern(out, buf, buf_len, gfp_mask);
> + if (IS_ERR(write_bio))
> + goto err_write_bio;

bio_map_kern is only for passthrough I/Os. You need to use
a bio_add_page loop here.

> index 336146798e56..f8c80940c7ad 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -562,4 +562,9 @@ struct cio {
> atomic_t refcount;
> };
>
> +struct copy_ctx {
> + struct cio *cio;
> + struct work_struct dispatch_work;
> + struct bio *write_bio;
> +};

This is misnamed as it's only used by the fallback code, and also
should be private to blk-lib.c.


2023-08-02 07:55:36

by Kent Overstreet

[permalink] [raw]
Subject: Re: [PATCH v13 3/9] block: add emulation for copy

On Tue, Aug 01, 2023 at 06:37:02PM +0530, Nitesh Shetty wrote:
> On 23/07/20 09:50AM, Christoph Hellwig wrote:
> > > +static void *blkdev_copy_alloc_buf(sector_t req_size, sector_t *alloc_size,
> > > + gfp_t gfp_mask)
> > > +{
> > > + int min_size = PAGE_SIZE;
> > > + void *buf;
> > > +
> > > + while (req_size >= min_size) {
> > > + buf = kvmalloc(req_size, gfp_mask);
> > > + if (buf) {
> > > + *alloc_size = req_size;
> > > + return buf;
> > > + }
> > > + /* retry half the requested size */
> > > + req_size >>= 1;
> > > + }
> > > +
> > > + return NULL;
> >
> > Is there any good reason for using vmalloc instead of a bunch
> > of distcontiguous pages?
> >
>
> kvmalloc seemed convenient for the purpose. We will need to call alloc_page
> in a loop to guarantee discontigous pages. Do you prefer that over kvmalloc?

No, kvmalloc should be the preferred approach here now: with large
folios, we're now getting better about doing more large memory
allocations and avoiding fragmentation, so in practice this won't be a
vmalloc allocation except in exceptional circumstances, and performance
will be better and the code will be simpler doing a single large
allocation.