2021-01-09 16:10:23

by Pavel Begunkov

[permalink] [raw]
Subject: [PATCH v3 0/7] no-copy bvec

Currently, when iomap and block direct IO gets a bvec based iterator
the bvec will be copied, with all other accounting that takes much
CPU time and causes additional allocation for larger bvecs. The
patchset makes it to reuse the passed in iter bvec.

[1,2] are forbidding zero-length bvec segments to not pile special
cases, [3] skip/fix PSI tracking to not iterate over bvecs extra
time.


nullblk completion_nsec=0 submit_queues=NR_CORES, no merges, no stats
fio/t/io_uring /dev/nullb0 -d 128 -s 32 -c 32 -p 0 -B 1 -F 1 -b BLOCK_SIZE

BLOCK_SIZE 512 4K 8K 16K 32K 64K
===================================================================
old (KIOPS) 1208 1208 1131 1039 863 699
new (KIOPS) 1222 1222 1170 1137 1083 982

Previously, Jens got before 10% difference for polling real HW and small
block sizes, but that was for an older version that had one
iov_iter_advance() less


since RFC:
- add target_core_file patch by Christoph
- make no-copy default behaviour, remove iter flag
- iter_advance() instead of hacks to revert to work
- add bvec iter_advance() optimisation patch
- remove PSI annotations from direct IO (iomap, block and fs/direct)
- note in d/f/porting

since v1:
- don't allow zero-length bvec segments (Ming)
- don't add a BIO_WORKINGSET-less version of bio_add_page(), just clear
the flag at the end and leave it for further cleanups (Christoph)
- commit message and comments rewording (Dave)
- other nits by Christoph

since v2:
- add a comment in 1/7 (Christoph)
- add a note about 0-len bvecs in biovecs.rst (Matthew)

Christoph Hellwig (1):
target/file: allocate the bvec array as part of struct
target_core_file_cmd

Pavel Begunkov (6):
splice: don't generate zero-len segement bvecs
bvec/iter: disallow zero-length segment bvecs
block/psi: remove PSI annotations from direct IO
iov_iter: optimise bvec iov_iter_advance()
bio: add a helper calculating nr segments to alloc
bio: don't copy bvec for direct IO

Documentation/block/biovecs.rst | 2 +
Documentation/filesystems/porting.rst | 16 ++++++
block/bio.c | 71 +++++++++++++--------------
drivers/target/target_core_file.c | 20 +++-----
fs/block_dev.c | 7 +--
fs/direct-io.c | 2 +
fs/iomap/direct-io.c | 9 ++--
fs/splice.c | 9 ++--
include/linux/bio.h | 13 +++++
lib/iov_iter.c | 21 +++++++-
10 files changed, 106 insertions(+), 64 deletions(-)

--
2.24.0


2021-01-09 16:10:42

by Pavel Begunkov

[permalink] [raw]
Subject: [PATCH v3 6/7] bio: add a helper calculating nr segments to alloc

Add a helper function calculating the number of bvec segments we need to
allocate to construct a bio. It doesn't change anything functionally,
but will be used to not duplicate special cases in the future.

Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/block_dev.c | 7 ++++---
fs/iomap/direct-io.c | 9 ++++-----
include/linux/bio.h | 10 ++++++++++
3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3b8963e228a1..6f5bd9950baf 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -416,7 +416,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
dio->size += bio->bi_iter.bi_size;
pos += bio->bi_iter.bi_size;

- nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
+ nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_PAGES);
if (!nr_pages) {
bool polled = false;

@@ -481,9 +481,10 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
int nr_pages;

- nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES + 1);
- if (!nr_pages)
+ if (!iov_iter_count(iter))
return 0;
+
+ nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_PAGES + 1);
if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES)
return __blkdev_direct_IO_simple(iocb, iter, nr_pages);

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 933f234d5bec..ea1e8f696076 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -250,11 +250,8 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
orig_count = iov_iter_count(dio->submit.iter);
iov_iter_truncate(dio->submit.iter, length);

- nr_pages = iov_iter_npages(dio->submit.iter, BIO_MAX_PAGES);
- if (nr_pages <= 0) {
- ret = nr_pages;
+ if (!iov_iter_count(dio->submit.iter))
goto out;
- }

if (need_zeroout) {
/* zero out from the start of the block to the write offset */
@@ -263,6 +260,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
iomap_dio_zero(dio, iomap, pos - pad, pad);
}

+ nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_PAGES);
do {
size_t n;
if (dio->error) {
@@ -308,7 +306,8 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
dio->size += n;
copied += n;

- nr_pages = iov_iter_npages(dio->submit.iter, BIO_MAX_PAGES);
+ nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter,
+ BIO_MAX_PAGES);
iomap_dio_submit_bio(dio, iomap, bio, pos);
pos += n;
} while (nr_pages);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 1edda614f7ce..d8f9077c43ef 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -10,6 +10,7 @@
#include <linux/ioprio.h>
/* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */
#include <linux/blk_types.h>
+#include <linux/uio.h>

#define BIO_DEBUG

@@ -441,6 +442,15 @@ static inline void bio_wouldblock_error(struct bio *bio)
bio_endio(bio);
}

+/*
+ * Calculate number of bvec segments that should be allocated to fit data
+ * pointed by @iter.
+ */
+static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs)
+{
+ return iov_iter_npages(iter, max_segs);
+}
+
struct request_queue;

extern int submit_bio_wait(struct bio *bio);
--
2.24.0

2021-01-09 16:10:53

by Pavel Begunkov

[permalink] [raw]
Subject: [PATCH v3 7/7] bio: don't copy bvec for direct IO

The block layer spends quite a while in blkdev_direct_IO() to copy and
initialise bio's bvec. However, if we've already got a bvec in the input
iterator it might be reused in some cases, i.e. when new
ITER_BVEC_FLAG_FIXED flag is set. Simple tests show considerable
performance boost, and it also reduces memory footprint.

Suggested-by: Matthew Wilcox <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Pavel Begunkov <[email protected]>
---
Documentation/filesystems/porting.rst | 9 ++++
block/bio.c | 67 ++++++++++++---------------
include/linux/bio.h | 5 +-
3 files changed, 42 insertions(+), 39 deletions(-)

diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index c722d94f29ea..1f8cf8e10b34 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -872,3 +872,12 @@ its result is kern_unmount() or kern_unmount_array().

zero-length bvec segments are disallowed, they must be filtered out before
passed on to an iterator.
+
+---
+
+**mandatory**
+
+For bvec based itererators bio_iov_iter_get_pages() now doesn't copy bvecs but
+uses the one provided. Anyone issuing kiocb-I/O should ensure that the bvec and
+page references stay until I/O has completed, i.e. until ->ki_complete() has
+been called or returned with non -EIOCBQUEUED code.
diff --git a/block/bio.c b/block/bio.c
index 9f26984af643..6f031a04b59a 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -960,21 +960,17 @@ void bio_release_pages(struct bio *bio, bool mark_dirty)
}
EXPORT_SYMBOL_GPL(bio_release_pages);

-static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
+static int bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
{
- const struct bio_vec *bv = iter->bvec;
- unsigned int len;
- size_t size;
-
- if (WARN_ON_ONCE(iter->iov_offset > bv->bv_len))
- return -EINVAL;
-
- len = min_t(size_t, bv->bv_len - iter->iov_offset, iter->count);
- size = bio_add_page(bio, bv->bv_page, len,
- bv->bv_offset + iter->iov_offset);
- if (unlikely(size != len))
- return -EINVAL;
- iov_iter_advance(iter, size);
+ WARN_ON_ONCE(BVEC_POOL_IDX(bio) != 0);
+
+ bio->bi_vcnt = iter->nr_segs;
+ bio->bi_max_vecs = iter->nr_segs;
+ bio->bi_io_vec = (struct bio_vec *)iter->bvec;
+ bio->bi_iter.bi_bvec_done = iter->iov_offset;
+ bio->bi_iter.bi_size = iter->count;
+
+ iov_iter_advance(iter, iter->count);
return 0;
}

@@ -1088,12 +1084,12 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
* This takes either an iterator pointing to user memory, or one pointing to
* kernel pages (BVEC iterator). If we're adding user pages, we pin them and
* map them into the kernel. On IO completion, the caller should put those
- * pages. If we're adding kernel pages, and the caller told us it's safe to
- * do so, we just have to add the pages to the bio directly. We don't grab an
- * extra reference to those pages (the user should already have that), and we
- * don't put the page on IO completion. The caller needs to check if the bio is
- * flagged BIO_NO_PAGE_REF on IO completion. If it isn't, then pages should be
- * released.
+ * pages. For bvec based iterators bio_iov_iter_get_pages() uses the provided
+ * bvecs rather than copying them. Hence anyone issuing kiocb based IO needs
+ * to ensure the bvecs and pages stay referenced until the submitted I/O is
+ * completed by a call to ->ki_complete() or returns with an error other than
+ * -EIOCBQUEUED. The caller needs to check if the bio is flagged BIO_NO_PAGE_REF
+ * on IO completion. If it isn't, then pages should be released.
*
* The function tries, but does not guarantee, to pin as many pages as
* fit into the bio, or are requested in @iter, whatever is smaller. If
@@ -1105,27 +1101,22 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
*/
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
- const bool is_bvec = iov_iter_is_bvec(iter);
- int ret;
-
- if (WARN_ON_ONCE(bio->bi_vcnt))
- return -EINVAL;
+ int ret = 0;

- do {
- if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
- if (WARN_ON_ONCE(is_bvec))
- return -EINVAL;
- ret = __bio_iov_append_get_pages(bio, iter);
- } else {
- if (is_bvec)
- ret = __bio_iov_bvec_add_pages(bio, iter);
+ if (iov_iter_is_bvec(iter)) {
+ if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND))
+ return -EINVAL;
+ bio_iov_bvec_set(bio, iter);
+ bio_set_flag(bio, BIO_NO_PAGE_REF);
+ return 0;
+ } else {
+ do {
+ if (bio_op(bio) == REQ_OP_ZONE_APPEND)
+ ret = __bio_iov_append_get_pages(bio, iter);
else
ret = __bio_iov_iter_get_pages(bio, iter);
- }
- } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
-
- if (is_bvec)
- bio_set_flag(bio, BIO_NO_PAGE_REF);
+ } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
+ }

/* don't account direct I/O as memory stall */
bio_clear_flag(bio, BIO_WORKINGSET);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d8f9077c43ef..1d30572a8c53 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -444,10 +444,13 @@ static inline void bio_wouldblock_error(struct bio *bio)

/*
* Calculate number of bvec segments that should be allocated to fit data
- * pointed by @iter.
+ * pointed by @iter. If @iter is backed by bvec it's going to be reused
+ * instead of allocating a new one.
*/
static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs)
{
+ if (iov_iter_is_bvec(iter))
+ return 0;
return iov_iter_npages(iter, max_segs);
}

--
2.24.0

2021-01-11 03:00:52

by Ming Lei

[permalink] [raw]
Subject: Re: [PATCH v3 6/7] bio: add a helper calculating nr segments to alloc

On Sat, Jan 09, 2021 at 04:03:02PM +0000, Pavel Begunkov wrote:
> Add a helper function calculating the number of bvec segments we need to
> allocate to construct a bio. It doesn't change anything functionally,
> but will be used to not duplicate special cases in the future.
>
> Reviewed-by: Christoph Hellwig <[email protected]>
> Signed-off-by: Pavel Begunkov <[email protected]>
> ---
> fs/block_dev.c | 7 ++++---
> fs/iomap/direct-io.c | 9 ++++-----
> include/linux/bio.h | 10 ++++++++++
> 3 files changed, 18 insertions(+), 8 deletions(-)
>
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index 3b8963e228a1..6f5bd9950baf 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -416,7 +416,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
> dio->size += bio->bi_iter.bi_size;
> pos += bio->bi_iter.bi_size;
>
> - nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
> + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_PAGES);
> if (!nr_pages) {
> bool polled = false;
>
> @@ -481,9 +481,10 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
> {
> int nr_pages;
>
> - nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES + 1);
> - if (!nr_pages)
> + if (!iov_iter_count(iter))
> return 0;
> +
> + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_PAGES + 1);
> if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES)
> return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
>
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index 933f234d5bec..ea1e8f696076 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -250,11 +250,8 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
> orig_count = iov_iter_count(dio->submit.iter);
> iov_iter_truncate(dio->submit.iter, length);
>
> - nr_pages = iov_iter_npages(dio->submit.iter, BIO_MAX_PAGES);
> - if (nr_pages <= 0) {
> - ret = nr_pages;
> + if (!iov_iter_count(dio->submit.iter))
> goto out;
> - }
>
> if (need_zeroout) {
> /* zero out from the start of the block to the write offset */
> @@ -263,6 +260,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
> iomap_dio_zero(dio, iomap, pos - pad, pad);
> }
>
> + nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_PAGES);
> do {
> size_t n;
> if (dio->error) {
> @@ -308,7 +306,8 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
> dio->size += n;
> copied += n;
>
> - nr_pages = iov_iter_npages(dio->submit.iter, BIO_MAX_PAGES);
> + nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter,
> + BIO_MAX_PAGES);
> iomap_dio_submit_bio(dio, iomap, bio, pos);
> pos += n;
> } while (nr_pages);
> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index 1edda614f7ce..d8f9077c43ef 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -10,6 +10,7 @@
> #include <linux/ioprio.h>
> /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */
> #include <linux/blk_types.h>
> +#include <linux/uio.h>
>
> #define BIO_DEBUG
>
> @@ -441,6 +442,15 @@ static inline void bio_wouldblock_error(struct bio *bio)
> bio_endio(bio);
> }
>
> +/*
> + * Calculate number of bvec segments that should be allocated to fit data
> + * pointed by @iter.
> + */
> +static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs)
> +{
> + return iov_iter_npages(iter, max_segs);
> +}
> +
> struct request_queue;
>
> extern int submit_bio_wait(struct bio *bio);
> --
> 2.24.0
>

Reviewed-by: Ming Lei <[email protected]>

--
Ming

2021-01-11 03:03:31

by Ming Lei

[permalink] [raw]
Subject: Re: [PATCH v3 7/7] bio: don't copy bvec for direct IO

On Sat, Jan 09, 2021 at 04:03:03PM +0000, Pavel Begunkov wrote:
> The block layer spends quite a while in blkdev_direct_IO() to copy and
> initialise bio's bvec. However, if we've already got a bvec in the input
> iterator it might be reused in some cases, i.e. when new
> ITER_BVEC_FLAG_FIXED flag is set. Simple tests show considerable
> performance boost, and it also reduces memory footprint.
>
> Suggested-by: Matthew Wilcox <[email protected]>
> Reviewed-by: Christoph Hellwig <[email protected]>
> Signed-off-by: Pavel Begunkov <[email protected]>
> ---
> Documentation/filesystems/porting.rst | 9 ++++
> block/bio.c | 67 ++++++++++++---------------
> include/linux/bio.h | 5 +-
> 3 files changed, 42 insertions(+), 39 deletions(-)
>
> diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
> index c722d94f29ea..1f8cf8e10b34 100644
> --- a/Documentation/filesystems/porting.rst
> +++ b/Documentation/filesystems/porting.rst
> @@ -872,3 +872,12 @@ its result is kern_unmount() or kern_unmount_array().
>
> zero-length bvec segments are disallowed, they must be filtered out before
> passed on to an iterator.
> +
> +---
> +
> +**mandatory**
> +
> +For bvec based itererators bio_iov_iter_get_pages() now doesn't copy bvecs but
> +uses the one provided. Anyone issuing kiocb-I/O should ensure that the bvec and
> +page references stay until I/O has completed, i.e. until ->ki_complete() has
> +been called or returned with non -EIOCBQUEUED code.
> diff --git a/block/bio.c b/block/bio.c
> index 9f26984af643..6f031a04b59a 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -960,21 +960,17 @@ void bio_release_pages(struct bio *bio, bool mark_dirty)
> }
> EXPORT_SYMBOL_GPL(bio_release_pages);
>
> -static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
> +static int bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
> {
> - const struct bio_vec *bv = iter->bvec;
> - unsigned int len;
> - size_t size;
> -
> - if (WARN_ON_ONCE(iter->iov_offset > bv->bv_len))
> - return -EINVAL;
> -
> - len = min_t(size_t, bv->bv_len - iter->iov_offset, iter->count);
> - size = bio_add_page(bio, bv->bv_page, len,
> - bv->bv_offset + iter->iov_offset);
> - if (unlikely(size != len))
> - return -EINVAL;
> - iov_iter_advance(iter, size);
> + WARN_ON_ONCE(BVEC_POOL_IDX(bio) != 0);
> +
> + bio->bi_vcnt = iter->nr_segs;
> + bio->bi_max_vecs = iter->nr_segs;
> + bio->bi_io_vec = (struct bio_vec *)iter->bvec;
> + bio->bi_iter.bi_bvec_done = iter->iov_offset;
> + bio->bi_iter.bi_size = iter->count;
> +
> + iov_iter_advance(iter, iter->count);
> return 0;
> }
>
> @@ -1088,12 +1084,12 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
> * This takes either an iterator pointing to user memory, or one pointing to
> * kernel pages (BVEC iterator). If we're adding user pages, we pin them and
> * map them into the kernel. On IO completion, the caller should put those
> - * pages. If we're adding kernel pages, and the caller told us it's safe to
> - * do so, we just have to add the pages to the bio directly. We don't grab an
> - * extra reference to those pages (the user should already have that), and we
> - * don't put the page on IO completion. The caller needs to check if the bio is
> - * flagged BIO_NO_PAGE_REF on IO completion. If it isn't, then pages should be
> - * released.
> + * pages. For bvec based iterators bio_iov_iter_get_pages() uses the provided
> + * bvecs rather than copying them. Hence anyone issuing kiocb based IO needs
> + * to ensure the bvecs and pages stay referenced until the submitted I/O is
> + * completed by a call to ->ki_complete() or returns with an error other than
> + * -EIOCBQUEUED. The caller needs to check if the bio is flagged BIO_NO_PAGE_REF
> + * on IO completion. If it isn't, then pages should be released.
> *
> * The function tries, but does not guarantee, to pin as many pages as
> * fit into the bio, or are requested in @iter, whatever is smaller. If
> @@ -1105,27 +1101,22 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
> */
> int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
> {
> - const bool is_bvec = iov_iter_is_bvec(iter);
> - int ret;
> -
> - if (WARN_ON_ONCE(bio->bi_vcnt))
> - return -EINVAL;
> + int ret = 0;
>
> - do {
> - if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
> - if (WARN_ON_ONCE(is_bvec))
> - return -EINVAL;
> - ret = __bio_iov_append_get_pages(bio, iter);
> - } else {
> - if (is_bvec)
> - ret = __bio_iov_bvec_add_pages(bio, iter);
> + if (iov_iter_is_bvec(iter)) {
> + if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND))
> + return -EINVAL;
> + bio_iov_bvec_set(bio, iter);
> + bio_set_flag(bio, BIO_NO_PAGE_REF);
> + return 0;
> + } else {
> + do {
> + if (bio_op(bio) == REQ_OP_ZONE_APPEND)
> + ret = __bio_iov_append_get_pages(bio, iter);
> else
> ret = __bio_iov_iter_get_pages(bio, iter);
> - }
> - } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
> -
> - if (is_bvec)
> - bio_set_flag(bio, BIO_NO_PAGE_REF);
> + } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
> + }
>
> /* don't account direct I/O as memory stall */
> bio_clear_flag(bio, BIO_WORKINGSET);
> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index d8f9077c43ef..1d30572a8c53 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -444,10 +444,13 @@ static inline void bio_wouldblock_error(struct bio *bio)
>
> /*
> * Calculate number of bvec segments that should be allocated to fit data
> - * pointed by @iter.
> + * pointed by @iter. If @iter is backed by bvec it's going to be reused
> + * instead of allocating a new one.
> */
> static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs)
> {
> + if (iov_iter_is_bvec(iter))
> + return 0;
> return iov_iter_npages(iter, max_segs);
> }
>
> --
> 2.24.0
>

Reviewed-by: Ming Lei <[email protected]>

--
Ming

2021-01-26 07:44:41

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH v3 0/7] no-copy bvec

On 1/9/21 9:02 AM, Pavel Begunkov wrote:
> Currently, when iomap and block direct IO gets a bvec based iterator
> the bvec will be copied, with all other accounting that takes much
> CPU time and causes additional allocation for larger bvecs. The
> patchset makes it to reuse the passed in iter bvec.

Applied, thanks.

--
Jens Axboe