Hi,
Interests[1] have been shown in multipage bvecs, so this patchset
try to prepare for the support and do two things:
1) the 1st 4 patches use bvec iterator to implement iterate_bvec(),
then we can drop the non-standard way for iterating bvec, which
can be thought as a good cleanup for lib/iov_iter.c
2) remove BIO_MAX_SECTORS & BIO_MAX_SIZE, and now there is only
one user for each. Once multipage bvecs is introduced, one bio
may hold lots of sectors, and we should always use sort of BIO_MAX_VECS
which should be introduced in future and is similiar with current
BIO_MAX_PAGES.
xfstests(-a auto) have been run over ext4/xfs and no regression found
by this patchset.
V4:
- make xfstests cover xfs
- rebase on for-next of block tree
V3:
- include kenrel.h & bug.h in bvec.h for fix comiling failure on arm
as reported by 0day ktest
- build test on arm & arm64
V2:
- rename bvec_iter.h as bvec.h
- always include bvec.h into blk_types.h as suggested by Christoph
V1:
- don't move BIO_MAX_* to bvec_iter.h as pointed out by Christoph
- run xfstests against v4.6-rc1-next-20160329
- add Reviewed-by
- for 1,4 and 5, Reviewd-by not added, Christoph still expressed
'this looks fine to me.'
Ming Lei (10):
block: move bvec iterator into include/linux/bvec.h
block: move two bvec structure into bvec.h
block: mark 1st parameter of bvec_iter_advance as const
iov_iter: use bvec iterator to implement iterate_bvec()
fs: xfs: replace BIO_MAX_SECTORS with BIO_MAX_PAGES
block: bio: remove BIO_MAX_SECTORS
block: drbd: avoid to use BIO_MAX_SIZE
block: bio: remove BIO_MAX_SIZE
block: make sure big bio is splitted into at most 256 bvecs
block: allow to merge bios if splitting because of big bvecs num
block/blk-merge.c | 35 ++++++++++++++--
drivers/block/drbd/drbd_int.h | 4 +-
fs/xfs/xfs_buf.c | 4 +-
include/linux/bio.h | 52 -----------------------
include/linux/blk_types.h | 22 +---------
include/linux/bvec.h | 96 +++++++++++++++++++++++++++++++++++++++++++
lib/iov_iter.c | 30 +++++---------
7 files changed, 142 insertions(+), 101 deletions(-)
create mode 100644 include/linux/bvec.h
--
1.9.1
bvec iterator helpers should be used to implement by
iterate_bvec():lib/iov_iter.c too, and move them into
one header, so that we can keep bvec iterator header
out of CONFIG_BLOCK. Then we can remove the reinventing
of wheel in iterate_bvec().
Signed-off-by: Ming Lei <[email protected]>
---
include/linux/bio.h | 51 +-----------------------------------
include/linux/bvec.h | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 75 insertions(+), 50 deletions(-)
create mode 100644 include/linux/bvec.h
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 6b7481f..1c06184 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -31,6 +31,7 @@
/* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */
#include <linux/blk_types.h>
+#include <linux/bvec.h>
#define BIO_DEBUG
@@ -57,29 +58,6 @@
(bio)->bi_rw |= ((unsigned long) (prio) << BIO_PRIO_SHIFT); \
} while (0)
-/*
- * various member access, note that bio_data should of course not be used
- * on highmem page vectors
- */
-#define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx])
-
-#define bvec_iter_page(bvec, iter) \
- (__bvec_iter_bvec((bvec), (iter))->bv_page)
-
-#define bvec_iter_len(bvec, iter) \
- min((iter).bi_size, \
- __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done)
-
-#define bvec_iter_offset(bvec, iter) \
- (__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done)
-
-#define bvec_iter_bvec(bvec, iter) \
-((struct bio_vec) { \
- .bv_page = bvec_iter_page((bvec), (iter)), \
- .bv_len = bvec_iter_len((bvec), (iter)), \
- .bv_offset = bvec_iter_offset((bvec), (iter)), \
-})
-
#define bio_iter_iovec(bio, iter) \
bvec_iter_bvec((bio)->bi_io_vec, (iter))
@@ -193,33 +171,6 @@ static inline void *bio_data(struct bio *bio)
#define bio_for_each_segment_all(bvl, bio, i) \
for (i = 0, bvl = (bio)->bi_io_vec; i < (bio)->bi_vcnt; i++, bvl++)
-static inline void bvec_iter_advance(struct bio_vec *bv, struct bvec_iter *iter,
- unsigned bytes)
-{
- WARN_ONCE(bytes > iter->bi_size,
- "Attempted to advance past end of bvec iter\n");
-
- while (bytes) {
- unsigned len = min(bytes, bvec_iter_len(bv, *iter));
-
- bytes -= len;
- iter->bi_size -= len;
- iter->bi_bvec_done += len;
-
- if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
- iter->bi_bvec_done = 0;
- iter->bi_idx++;
- }
- }
-}
-
-#define for_each_bvec(bvl, bio_vec, iter, start) \
- for (iter = (start); \
- (iter).bi_size && \
- ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \
- bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
-
-
static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
unsigned bytes)
{
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
new file mode 100644
index 0000000..29c459d
--- /dev/null
+++ b/include/linux/bvec.h
@@ -0,0 +1,74 @@
+/*
+ * bvec iterator
+ *
+ * Copyright (C) 2001 Ming Lei <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
+ */
+#ifndef __LINUX_BVEC_ITER_H
+#define __LINUX_BVEC_ITER_H
+
+#include <linux/blk_types.h>
+
+/*
+ * various member access, note that bio_data should of course not be used
+ * on highmem page vectors
+ */
+#define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx])
+
+#define bvec_iter_page(bvec, iter) \
+ (__bvec_iter_bvec((bvec), (iter))->bv_page)
+
+#define bvec_iter_len(bvec, iter) \
+ min((iter).bi_size, \
+ __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done)
+
+#define bvec_iter_offset(bvec, iter) \
+ (__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done)
+
+#define bvec_iter_bvec(bvec, iter) \
+((struct bio_vec) { \
+ .bv_page = bvec_iter_page((bvec), (iter)), \
+ .bv_len = bvec_iter_len((bvec), (iter)), \
+ .bv_offset = bvec_iter_offset((bvec), (iter)), \
+})
+
+static inline void bvec_iter_advance(struct bio_vec *bv, struct bvec_iter *iter,
+ unsigned bytes)
+{
+ WARN_ONCE(bytes > iter->bi_size,
+ "Attempted to advance past end of bvec iter\n");
+
+ while (bytes) {
+ unsigned len = min(bytes, bvec_iter_len(bv, *iter));
+
+ bytes -= len;
+ iter->bi_size -= len;
+ iter->bi_bvec_done += len;
+
+ if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
+ iter->bi_bvec_done = 0;
+ iter->bi_idx++;
+ }
+ }
+}
+
+#define for_each_bvec(bvl, bio_vec, iter, start) \
+ for (iter = (start); \
+ (iter).bi_size && \
+ ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \
+ bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
+
+#endif /* __LINUX_BVEC_ITER_H */
--
1.9.1
This patch moves 'struct bio_vec' and 'struct bvec_iter'
into 'include/linux/bvec.h', then always include this header
into 'include/linux/blk_types.h'.
With this change, both 'struct bvec_iter' and bvec iterator
helpers don't depend on CONFIG_BLOCK any more, then we can
use bvec iterator to implement iterate_bvec(): lib/iov_iter.c.
Suggested-by: Christoph Hellwig <[email protected]>
Signed-off-by: Ming Lei <[email protected]>
---
include/linux/bio.h | 1 -
include/linux/blk_types.h | 22 +---------------------
include/linux/bvec.h | 23 ++++++++++++++++++++++-
3 files changed, 23 insertions(+), 23 deletions(-)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 1c06184..96835a0 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -31,7 +31,6 @@
/* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */
#include <linux/blk_types.h>
-#include <linux/bvec.h>
#define BIO_DEBUG
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 86a38ea..a8d8e1f 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -6,6 +6,7 @@
#define __LINUX_BLK_TYPES_H
#include <linux/types.h>
+#include <linux/bvec.h>
struct bio_set;
struct bio;
@@ -17,28 +18,7 @@ struct cgroup_subsys_state;
typedef void (bio_end_io_t) (struct bio *);
typedef void (bio_destructor_t) (struct bio *);
-/*
- * was unsigned short, but we might as well be ready for > 64kB I/O pages
- */
-struct bio_vec {
- struct page *bv_page;
- unsigned int bv_len;
- unsigned int bv_offset;
-};
-
#ifdef CONFIG_BLOCK
-
-struct bvec_iter {
- sector_t bi_sector; /* device address in 512 byte
- sectors */
- unsigned int bi_size; /* residual I/O count */
-
- unsigned int bi_idx; /* current index into bvl_vec */
-
- unsigned int bi_bvec_done; /* number of bytes completed in
- current bvec */
-};
-
/*
* main unit of I/O for the block layer and lower layers (ie drivers and
* stacking drivers)
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 29c459d..096efd2 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -20,7 +20,28 @@
#ifndef __LINUX_BVEC_ITER_H
#define __LINUX_BVEC_ITER_H
-#include <linux/blk_types.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+
+/*
+ * was unsigned short, but we might as well be ready for > 64kB I/O pages
+ */
+struct bio_vec {
+ struct page *bv_page;
+ unsigned int bv_len;
+ unsigned int bv_offset;
+};
+
+struct bvec_iter {
+ sector_t bi_sector; /* device address in 512 byte
+ sectors */
+ unsigned int bi_size; /* residual I/O count */
+
+ unsigned int bi_idx; /* current index into bvl_vec */
+
+ unsigned int bi_bvec_done; /* number of bytes completed in
+ current bvec */
+};
/*
* various member access, note that bio_data should of course not be used
--
1.9.1
bvec has provided one iterator already, so not necessary
to reinvent a new wheel for this job.
Signed-off-by: Ming Lei <[email protected]>
---
lib/iov_iter.c | 30 ++++++++++--------------------
1 file changed, 10 insertions(+), 20 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 5fecddc..c8691ac 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -57,35 +57,25 @@
}
#define iterate_bvec(i, n, __v, __p, skip, STEP) { \
- size_t wanted = n; \
+ struct bvec_iter __bi, __start; \
+ __start.bi_size = n; \
+ __start.bi_bvec_done = skip; \
+ __start.bi_idx = 0; \
__p = i->bvec; \
- __v.bv_len = min_t(size_t, n, __p->bv_len - skip); \
- if (likely(__v.bv_len)) { \
- __v.bv_page = __p->bv_page; \
- __v.bv_offset = __p->bv_offset + skip; \
+ for_each_bvec(__v, __p, __bi, __start) { \
(void)(STEP); \
- skip += __v.bv_len; \
- n -= __v.bv_len; \
} \
- while (unlikely(n)) { \
- __p++; \
- __v.bv_len = min_t(size_t, n, __p->bv_len); \
- if (unlikely(!__v.bv_len)) \
- continue; \
- __v.bv_page = __p->bv_page; \
- __v.bv_offset = __p->bv_offset; \
- (void)(STEP); \
+ if (!__bi.bi_idx) \
+ skip += __v.bv_len; \
+ else \
skip = __v.bv_len; \
- n -= __v.bv_len; \
- } \
- n = wanted; \
}
#define iterate_all_kinds(i, n, v, I, B, K) { \
size_t skip = i->iov_offset; \
if (unlikely(i->type & ITER_BVEC)) { \
const struct bio_vec *bvec; \
- struct bio_vec v; \
+ struct bio_vec v = { 0 }; \
iterate_bvec(i, n, v, bvec, skip, (B)) \
} else if (unlikely(i->type & ITER_KVEC)) { \
const struct kvec *kvec; \
@@ -102,7 +92,7 @@
size_t skip = i->iov_offset; \
if (unlikely(i->type & ITER_BVEC)) { \
const struct bio_vec *bvec; \
- struct bio_vec v; \
+ struct bio_vec v = { 0 }; \
iterate_bvec(i, n, v, bvec, skip, (B)) \
if (skip == bvec->bv_len) { \
bvec++; \
--
1.9.1
BIO_MAX_PAGES is used as maximum count of bvecs, so
replace BIO_MAX_SECTORS with BIO_MAX_PAGES since
BIO_MAX_SECTORS is to be removed.
Signed-off-by: Ming Lei <[email protected]>
---
fs/xfs/xfs_buf.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 9a2191b..b9ecb2d 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1161,9 +1161,7 @@ xfs_buf_ioapply_map(
next_chunk:
atomic_inc(&bp->b_io_remaining);
- nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
- if (nr_pages > total_nr_pages)
- nr_pages = total_nr_pages;
+ nr_pages = min(total_nr_pages, BIO_MAX_PAGES);
bio = bio_alloc(GFP_NOIO, nr_pages);
bio->bi_bdev = bp->b_target->bt_bdev;
--
1.9.1
No one need this macro now, so remove it. The motivation is
for supporting multipage bvecs, in which we only know
what the max count of bvecs is supported in the bio,
instead of max bio size.
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Ming Lei <[email protected]>
---
include/linux/bio.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d9beea7..c8553fe 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -41,7 +41,6 @@
#endif
#define BIO_MAX_PAGES 256
-#define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_SHIFT)
/*
* upper 16 bits of bi_rw define the io priority of this bio
--
1.9.1
No one need this macro, so remove it. The motivation is
for supporting multipage bvecs, in which we only know
what the max count of bvecs is supported in the bio,
instead of max size or max sectors.
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Ming Lei <[email protected]>
---
include/linux/bio.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 96835a0..d9beea7 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -42,7 +42,6 @@
#define BIO_MAX_PAGES 256
#define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_SHIFT)
-#define BIO_MAX_SECTORS (BIO_MAX_SIZE >> 9)
/*
* upper 16 bits of bi_rw define the io priority of this bio
--
1.9.1
drbd is the only user of BIO_MAX_SIZE, so use BIO_MAX_PAGES
instead.
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Ming Lei <[email protected]>
---
drivers/block/drbd/drbd_int.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 7a1cf7e..e6e4b08 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1327,14 +1327,14 @@ struct bm_extent {
#endif
#endif
-/* BIO_MAX_SIZE is 256 * PAGE_SIZE,
+/* Estimate max bio size as 256 * PAGE_SIZE,
* so for typical PAGE_SIZE of 4k, that is (1<<20) Byte.
* Since we may live in a mixed-platform cluster,
* we limit us to a platform agnostic constant here for now.
* A followup commit may allow even bigger BIO sizes,
* once we thought that through. */
#define DRBD_MAX_BIO_SIZE (1U << 20)
-#if DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
+#if DRBD_MAX_BIO_SIZE > (BIO_MAX_PAGES << PAGE_SHIFT)
#error Architecture not supported: DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
#endif
#define DRBD_MAX_BIO_SIZE_SAFE (1U << 12) /* Works always = 4k */
--
1.9.1
bvec_iter_advance() only writes the parameter of iterator,
so the base address of bvec can be marked as const safely.
Without the change, we can see compiling warning in the
following patch for implementing iterate_bvec(): lib/iov_iter.c
with bvec iterator.
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Ming Lei <[email protected]>
---
include/linux/bvec.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 096efd2..701b64a 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -66,7 +66,8 @@ struct bvec_iter {
.bv_offset = bvec_iter_offset((bvec), (iter)), \
})
-static inline void bvec_iter_advance(struct bio_vec *bv, struct bvec_iter *iter,
+static inline void bvec_iter_advance(const struct bio_vec *bv,
+ struct bvec_iter *iter,
unsigned bytes)
{
WARN_ONCE(bytes > iter->bi_size,
--
1.9.1
On 04/14/2016 05:46 AM, Ming Lei wrote:
> Hi,
>
> Interests[1] have been shown in multipage bvecs, so this patchset
> try to prepare for the support and do two things:
>
> 1) the 1st 4 patches use bvec iterator to implement iterate_bvec(),
> then we can drop the non-standard way for iterating bvec, which
> can be thought as a good cleanup for lib/iov_iter.c
>
> 2) remove BIO_MAX_SECTORS & BIO_MAX_SIZE, and now there is only
> one user for each. Once multipage bvecs is introduced, one bio
> may hold lots of sectors, and we should always use sort of BIO_MAX_VECS
> which should be introduced in future and is similiar with current
> BIO_MAX_PAGES.
>
> xfstests(-a auto) have been run over ext4/xfs and no regression found
> by this patchset.
We've had too many disasters in the block layer the last few series, I'm
making the 4.7 round a nice and small one. I don't mind taking prep
patches for the multipage bvecs, if they are simple and clean, but
that's about the extent of it.
Just a heads up.
--
Jens Axboe
On Thu, Apr 14, 2016 at 10:11 PM, Jens Axboe <[email protected]> wrote:
> On 04/14/2016 05:46 AM, Ming Lei wrote:
>>
>> Hi,
>>
>> Interests[1] have been shown in multipage bvecs, so this patchset
>> try to prepare for the support and do two things:
>>
>> 1) the 1st 4 patches use bvec iterator to implement iterate_bvec(),
>> then we can drop the non-standard way for iterating bvec, which
>> can be thought as a good cleanup for lib/iov_iter.c
>>
>> 2) remove BIO_MAX_SECTORS & BIO_MAX_SIZE, and now there is only
>> one user for each. Once multipage bvecs is introduced, one bio
>> may hold lots of sectors, and we should always use sort of BIO_MAX_VECS
>> which should be introduced in future and is similiar with current
>> BIO_MAX_PAGES.
>>
>> xfstests(-a auto) have been run over ext4/xfs and no regression found
>> by this patchset.
>
>
> We've had too many disasters in the block layer the last few series, I'm
> making the 4.7 round a nice and small one. I don't mind taking prep patches
> for the multipage bvecs, if they are simple and clean, but that's about the
> extent of it.
>
> Just a heads up.
Jens, thanks for your response, and understood your concerns.
Please hold on this patchset, and I will check it further and make it
better.
Thanks,