2023-12-12 12:38:29

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 00/13] btrfs: clean up RAID I/O geometry calculation

The calculation of the RAID I/O geometry in btrfs_map_block has been a maze of
if-else statements for a very long time and the advent of the
raid-stripe-tree made the situation even worse.

This patchset refactors btrfs_map_block() to untagle the maze and make I/O
geometry setting easier to follow, but does not introduce any functional
changes.

I've also run it through Josef's CI and there have been test failures, but
none of them introduced by these patches.

---
Johannes Thumshirn (13):
btrfs: factor out helper for single device IO check
btrfs: re-introduce struct btrfs_io_geometry
btrfs: factor out block-mapping for RAID0
btrfs: factor out RAID1 block mapping
btrfs: factor out block mapping for DUP profiles
btrfs: factor out block mapping for RAID10
btrfs: reduce scope of data_stripes in btrfs_map_block
btrfs: factor out block mapping for RAID5/6
btrfs: factor out block mapping for single profiles
btrfs: untagle if else maze in btrfs_map_block
btrfs: open code set_io_stripe for RAID56
btrfs: pass struct btrfs_io_geometry to set_io_stripe
btrfs: pass btrfs_io_geometry into btrfs_max_io_len

fs/btrfs/volumes.c | 388 +++++++++++++++++++++++++++++++++--------------------
1 file changed, 245 insertions(+), 143 deletions(-)
---
base-commit: 14d1d39586246ca9d4ce97049c98be849e3bbcd9
change-id: 20231207-btrfs_map_block-cleanup-346f53aff90d

Best regards,
--
Johannes Thumshirn <[email protected]>


2023-12-12 12:38:41

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 03/13] btrfs: factor out block-mapping for RAID0

Now that we have a container for the I/O geometry that has all the needed
information for the block mappings of RAID0, factor out a helper calculating
this information.

Signed-off-by: Johannes Thumshirn <[email protected]>
---
fs/btrfs/volumes.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d0e529b3fd39..a5d85a77da25 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6362,6 +6362,16 @@ static bool is_single_device_io(struct btrfs_fs_info *fs_info,
return true;
}

+static void map_blocks_for_raid0(struct btrfs_chunk_map *map,
+ enum btrfs_map_op op,
+ struct btrfs_io_geometry *io_geom)
+{
+ io_geom->stripe_index = io_geom->stripe_nr % map->num_stripes;
+ io_geom->stripe_nr /= map->num_stripes;
+ if (op == BTRFS_MAP_READ)
+ io_geom->mirror_num = 1;
+}
+
/*
* Map one logical range to one or more physical ranges.
*
@@ -6447,10 +6457,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
io_geom.stripe_index = 0;
io_geom.mirror_num = (mirror_num_ret ? *mirror_num_ret : 0);
if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
- io_geom.stripe_index = io_geom.stripe_nr % map->num_stripes;
- io_geom.stripe_nr /= map->num_stripes;
- if (op == BTRFS_MAP_READ)
- io_geom.mirror_num = 1;
+ map_blocks_for_raid0(map, op, &io_geom);
} else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
if (op != BTRFS_MAP_READ) {
io_geom.num_stripes = map->num_stripes;

--
2.43.0

2023-12-12 12:38:43

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 01/13] btrfs: factor out helper for single device IO check

The check in btrfs_map_block() deciding if a particular I/O is targeting a
single device is getting more and more convoluted.

Factor out the check conditions into a helper function, with no functional
change otherwise.

Signed-off-by: Johannes Thumshirn <[email protected]>
---
fs/btrfs/volumes.c | 28 ++++++++++++++++++++++++----
1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1cc6b5d5eb61..1011178a244c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6330,6 +6330,28 @@ static int set_io_stripe(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
return 0;
}

+static bool is_single_device_io(struct btrfs_fs_info *fs_info,
+ struct btrfs_io_stripe *smap,
+ struct btrfs_chunk_map *map,
+ int num_alloc_stripes,
+ enum btrfs_map_op op, int mirror_num)
+{
+ if (!smap)
+ return false;
+
+ if (num_alloc_stripes != 1)
+ return false;
+
+ if (btrfs_need_stripe_tree_update(fs_info, map->type) &&
+ op != BTRFS_MAP_READ)
+ return false;
+
+ if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)
+ return false;
+
+ return true;
+}
+
/*
* Map one logical range to one or more physical ranges.
*
@@ -6532,10 +6554,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* physical block information on the stack instead of allocating an
* I/O context structure.
*/
- if (smap && num_alloc_stripes == 1 &&
- !(btrfs_need_stripe_tree_update(fs_info, map->type) &&
- op != BTRFS_MAP_READ) &&
- !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) {
+ if (is_single_device_io(fs_info, smap, map, num_alloc_stripes, op,
+ mirror_num)) {
ret = set_io_stripe(fs_info, op, logical, length, smap, map,
stripe_index, stripe_offset, stripe_nr);
if (mirror_num_ret)

--
2.43.0

2023-12-12 12:38:45

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 02/13] btrfs: re-introduce struct btrfs_io_geometry

Re-introduce struct btrfs_io_geometry, holding the necessary bits and
pieces needed in btrfs_map_block() to decide the I/O geometry of a specific
block mapping.

Signed-off-by: Johannes Thumshirn <[email protected]>
---
fs/btrfs/volumes.c | 156 +++++++++++++++++++++++++++++------------------------
1 file changed, 86 insertions(+), 70 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1011178a244c..d0e529b3fd39 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -41,6 +41,16 @@
BTRFS_BLOCK_GROUP_RAID10 | \
BTRFS_BLOCK_GROUP_RAID56_MASK)

+struct btrfs_io_geometry {
+ u32 stripe_index;
+ u32 stripe_nr;
+ int mirror_num;
+ int num_stripes;
+ u64 stripe_offset;
+ u64 raid56_full_stripe_start;
+ int max_errors;
+};
+
const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID10] = {
.sub_stripes = 2,
@@ -6393,28 +6403,22 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
struct btrfs_io_stripe *smap, int *mirror_num_ret)
{
struct btrfs_chunk_map *map;
+ struct btrfs_io_geometry io_geom = { };
u64 map_offset;
- u64 stripe_offset;
- u32 stripe_nr;
- u32 stripe_index;
int data_stripes;
int i;
int ret = 0;
- int mirror_num = (mirror_num_ret ? *mirror_num_ret : 0);
- int num_stripes;
int num_copies;
- int max_errors = 0;
struct btrfs_io_context *bioc = NULL;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
int dev_replace_is_ongoing = 0;
u16 num_alloc_stripes;
- u64 raid56_full_stripe_start = (u64)-1;
u64 max_len;

ASSERT(bioc_ret);

num_copies = btrfs_num_copies(fs_info, logical, fs_info->sectorsize);
- if (mirror_num > num_copies)
+ if (io_geom.mirror_num > num_copies)
return -EINVAL;

map = btrfs_get_chunk_map(fs_info, logical, *length);
@@ -6424,8 +6428,10 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
data_stripes = nr_data_stripes(map);

map_offset = logical - map->start;
- max_len = btrfs_max_io_len(map, op, map_offset, &stripe_nr,
- &stripe_offset, &raid56_full_stripe_start);
+ io_geom.raid56_full_stripe_start = (u64)-1;
+ max_len = btrfs_max_io_len(map, op, map_offset, &io_geom.stripe_nr,
+ &io_geom.stripe_offset,
+ &io_geom.raid56_full_stripe_start);
*length = min_t(u64, map->chunk_len - map_offset, max_len);

down_read(&dev_replace->rwsem);
@@ -6437,53 +6443,54 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
if (!dev_replace_is_ongoing)
up_read(&dev_replace->rwsem);

- num_stripes = 1;
- stripe_index = 0;
+ io_geom.num_stripes = 1;
+ io_geom.stripe_index = 0;
+ io_geom.mirror_num = (mirror_num_ret ? *mirror_num_ret : 0);
if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
- stripe_index = stripe_nr % map->num_stripes;
- stripe_nr /= map->num_stripes;
+ io_geom.stripe_index = io_geom.stripe_nr % map->num_stripes;
+ io_geom.stripe_nr /= map->num_stripes;
if (op == BTRFS_MAP_READ)
- mirror_num = 1;
+ io_geom.mirror_num = 1;
} else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
if (op != BTRFS_MAP_READ) {
- num_stripes = map->num_stripes;
- } else if (mirror_num) {
- stripe_index = mirror_num - 1;
+ io_geom.num_stripes = map->num_stripes;
+ } else if (io_geom.mirror_num) {
+ io_geom.stripe_index = io_geom.mirror_num - 1;
} else {
- stripe_index = find_live_mirror(fs_info, map, 0,
+ io_geom.stripe_index = find_live_mirror(fs_info, map, 0,
dev_replace_is_ongoing);
- mirror_num = stripe_index + 1;
+ io_geom.mirror_num = io_geom.stripe_index + 1;
}

} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
if (op != BTRFS_MAP_READ) {
- num_stripes = map->num_stripes;
- } else if (mirror_num) {
- stripe_index = mirror_num - 1;
+ io_geom.num_stripes = map->num_stripes;
+ } else if (io_geom.mirror_num) {
+ io_geom.stripe_index = io_geom.mirror_num - 1;
} else {
- mirror_num = 1;
+ io_geom.mirror_num = 1;
}

} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
u32 factor = map->num_stripes / map->sub_stripes;

- stripe_index = (stripe_nr % factor) * map->sub_stripes;
- stripe_nr /= factor;
+ io_geom.stripe_index = (io_geom.stripe_nr % factor) * map->sub_stripes;
+ io_geom.stripe_nr /= factor;

if (op != BTRFS_MAP_READ)
- num_stripes = map->sub_stripes;
- else if (mirror_num)
- stripe_index += mirror_num - 1;
+ io_geom.num_stripes = map->sub_stripes;
+ else if (io_geom.mirror_num)
+ io_geom.stripe_index += io_geom.mirror_num - 1;
else {
- int old_stripe_index = stripe_index;
- stripe_index = find_live_mirror(fs_info, map,
- stripe_index,
+ int old_stripe_index = io_geom.stripe_index;
+ io_geom.stripe_index = find_live_mirror(fs_info, map,
+ io_geom.stripe_index,
dev_replace_is_ongoing);
- mirror_num = stripe_index - old_stripe_index + 1;
+ io_geom.mirror_num = io_geom.stripe_index - old_stripe_index + 1;
}

} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
- if (op != BTRFS_MAP_READ || mirror_num > 1) {
+ if (op != BTRFS_MAP_READ || io_geom.mirror_num > 1) {
/*
* Needs full stripe mapping.
*
@@ -6495,29 +6502,33 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* but that can be expensive. Here we just divide
* @stripe_nr with @data_stripes.
*/
- stripe_nr /= data_stripes;
+ io_geom.stripe_nr /= data_stripes;

/* RAID[56] write or recovery. Return all stripes */
- num_stripes = map->num_stripes;
- max_errors = btrfs_chunk_max_errors(map);
+ io_geom.num_stripes = map->num_stripes;
+ io_geom.max_errors = btrfs_chunk_max_errors(map);

/* Return the length to the full stripe end */
*length = min(logical + *length,
- raid56_full_stripe_start + map->start +
- btrfs_stripe_nr_to_offset(data_stripes)) -
+ io_geom.raid56_full_stripe_start +
+ map->start +
+ btrfs_stripe_nr_to_offset(
+ data_stripes)) -
logical;
- stripe_index = 0;
- stripe_offset = 0;
+ io_geom.stripe_index = 0;
+ io_geom.stripe_offset = 0;
} else {
- ASSERT(mirror_num <= 1);
+ ASSERT(io_geom.mirror_num <= 1);
/* Just grab the data stripe directly. */
- stripe_index = stripe_nr % data_stripes;
- stripe_nr /= data_stripes;
+ io_geom.stripe_index = io_geom.stripe_nr % data_stripes;
+ io_geom.stripe_nr /= data_stripes;

/* We distribute the parity blocks across stripes */
- stripe_index = (stripe_nr + stripe_index) % map->num_stripes;
- if (op == BTRFS_MAP_READ && mirror_num < 1)
- mirror_num = 1;
+ io_geom.stripe_index =
+ (io_geom.stripe_nr + io_geom.stripe_index) %
+ map->num_stripes;
+ if (op == BTRFS_MAP_READ && io_geom.mirror_num < 1)
+ io_geom.mirror_num = 1;
}
} else {
/*
@@ -6525,19 +6536,19 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* device we have to walk to find the data, and stripe_index is
* the number of our device in the stripe array
*/
- stripe_index = stripe_nr % map->num_stripes;
- stripe_nr /= map->num_stripes;
- mirror_num = stripe_index + 1;
+ io_geom.stripe_index = io_geom.stripe_nr % map->num_stripes;
+ io_geom.stripe_nr /= map->num_stripes;
+ io_geom.mirror_num = io_geom.stripe_index + 1;
}
- if (stripe_index >= map->num_stripes) {
+ if (io_geom.stripe_index >= map->num_stripes) {
btrfs_crit(fs_info,
"stripe index math went horribly wrong, got stripe_index=%u, num_stripes=%u",
- stripe_index, map->num_stripes);
+ io_geom.stripe_index, map->num_stripes);
ret = -EINVAL;
goto out;
}

- num_alloc_stripes = num_stripes;
+ num_alloc_stripes = io_geom.num_stripes;
if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
op != BTRFS_MAP_READ)
/*
@@ -6555,11 +6566,12 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* I/O context structure.
*/
if (is_single_device_io(fs_info, smap, map, num_alloc_stripes, op,
- mirror_num)) {
+ io_geom.mirror_num)) {
ret = set_io_stripe(fs_info, op, logical, length, smap, map,
- stripe_index, stripe_offset, stripe_nr);
+ io_geom.stripe_index, io_geom.stripe_offset,
+ io_geom.stripe_nr);
if (mirror_num_ret)
- *mirror_num_ret = mirror_num;
+ *mirror_num_ret = io_geom.mirror_num;
*bioc_ret = NULL;
goto out;
}
@@ -6579,7 +6591,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* It's still mostly the same as other profiles, just with extra rotation.
*/
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK &&
- (op != BTRFS_MAP_READ || mirror_num > 1)) {
+ (op != BTRFS_MAP_READ || io_geom.mirror_num > 1)) {
/*
* For RAID56 @stripe_nr is already the number of full stripes
* before us, which is also the rotation value (needs to modulo
@@ -6589,12 +6601,13 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* modulo, to reduce one modulo call.
*/
bioc->full_stripe_logical = map->start +
- btrfs_stripe_nr_to_offset(stripe_nr * data_stripes);
- for (int i = 0; i < num_stripes; i++) {
+ btrfs_stripe_nr_to_offset(io_geom.stripe_nr * data_stripes);
+ for (int i = 0; i < io_geom.num_stripes; i++) {
ret = set_io_stripe(fs_info, op, logical, length,
&bioc->stripes[i], map,
- (i + stripe_nr) % num_stripes,
- stripe_offset, stripe_nr);
+ (i + io_geom.stripe_nr) % io_geom.num_stripes,
+ io_geom.stripe_offset,
+ io_geom.stripe_nr);
if (ret < 0)
break;
}
@@ -6603,13 +6616,15 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* For all other non-RAID56 profiles, just copy the target
* stripe into the bioc.
*/
- for (i = 0; i < num_stripes; i++) {
+ for (i = 0; i < io_geom.num_stripes; i++) {
ret = set_io_stripe(fs_info, op, logical, length,
- &bioc->stripes[i], map, stripe_index,
- stripe_offset, stripe_nr);
+ &bioc->stripes[i], map,
+ io_geom.stripe_index,
+ io_geom.stripe_offset,
+ io_geom.stripe_nr);
if (ret < 0)
break;
- stripe_index++;
+ io_geom.stripe_index++;
}
}

@@ -6620,18 +6635,19 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
}

if (op != BTRFS_MAP_READ)
- max_errors = btrfs_chunk_max_errors(map);
+ io_geom.max_errors = btrfs_chunk_max_errors(map);

if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
op != BTRFS_MAP_READ) {
handle_ops_on_dev_replace(op, bioc, dev_replace, logical,
- &num_stripes, &max_errors);
+ &io_geom.num_stripes,
+ &io_geom.max_errors);
}

*bioc_ret = bioc;
- bioc->num_stripes = num_stripes;
- bioc->max_errors = max_errors;
- bioc->mirror_num = mirror_num;
+ bioc->num_stripes = io_geom.num_stripes;
+ bioc->max_errors = io_geom.max_errors;
+ bioc->mirror_num = io_geom.mirror_num;

out:
if (dev_replace_is_ongoing) {

--
2.43.0

2023-12-12 12:38:55

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 04/13] btrfs: factor out RAID1 block mapping

Now that we have a container for the I/O geometry that has all the needed
information for the block mappings of RAID1, factor out a helper calculating
this information.

Signed-off-by: Johannes Thumshirn <[email protected]>
---
fs/btrfs/volumes.c | 31 +++++++++++++++++++++----------
1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a5d85a77da25..f6f1e783b3c1 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6372,6 +6372,25 @@ static void map_blocks_for_raid0(struct btrfs_chunk_map *map,
io_geom->mirror_num = 1;
}

+static void map_blocks_for_raid1(struct btrfs_fs_info *fs_info,
+ struct btrfs_chunk_map *map,
+ enum btrfs_map_op op,
+ struct btrfs_io_geometry *io_geom, int replace)
+{
+ if (op != BTRFS_MAP_READ) {
+ io_geom->num_stripes = map->num_stripes;
+ return;
+ }
+
+ if (io_geom->mirror_num) {
+ io_geom->stripe_index = io_geom->mirror_num - 1;
+ return;
+ }
+
+ io_geom->stripe_index = find_live_mirror(fs_info, map, 0, replace);
+ io_geom->mirror_num = io_geom->stripe_index + 1;
+}
+
/*
* Map one logical range to one or more physical ranges.
*
@@ -6459,16 +6478,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
map_blocks_for_raid0(map, op, &io_geom);
} else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
- if (op != BTRFS_MAP_READ) {
- io_geom.num_stripes = map->num_stripes;
- } else if (io_geom.mirror_num) {
- io_geom.stripe_index = io_geom.mirror_num - 1;
- } else {
- io_geom.stripe_index = find_live_mirror(fs_info, map, 0,
- dev_replace_is_ongoing);
- io_geom.mirror_num = io_geom.stripe_index + 1;
- }
-
+ map_blocks_for_raid1(fs_info, map, op, &io_geom,
+ dev_replace_is_ongoing);
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
if (op != BTRFS_MAP_READ) {
io_geom.num_stripes = map->num_stripes;

--
2.43.0

2023-12-12 12:38:58

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 05/13] btrfs: factor out block mapping for DUP profiles

Now that we have a container for the I/O geometry that has all the needed
information for the block mappings of DUP, factor out a helper calculating
this information.

Signed-off-by: Johannes Thumshirn <[email protected]>
---
fs/btrfs/volumes.c | 26 ++++++++++++++++++--------
1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f6f1e783b3c1..b0a5c53fba51 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6391,6 +6391,23 @@ static void map_blocks_for_raid1(struct btrfs_fs_info *fs_info,
io_geom->mirror_num = io_geom->stripe_index + 1;
}

+static void map_blocks_for_dup(struct btrfs_chunk_map *map,
+ enum btrfs_map_op op,
+ struct btrfs_io_geometry *io_geom)
+{
+ if (op != BTRFS_MAP_READ) {
+ io_geom->num_stripes = map->num_stripes;
+ return;
+ }
+
+ if (io_geom->mirror_num) {
+ io_geom->stripe_index = io_geom->mirror_num - 1;
+ return;
+ }
+
+ io_geom->mirror_num = 1;
+}
+
/*
* Map one logical range to one or more physical ranges.
*
@@ -6481,14 +6498,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
map_blocks_for_raid1(fs_info, map, op, &io_geom,
dev_replace_is_ongoing);
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
- if (op != BTRFS_MAP_READ) {
- io_geom.num_stripes = map->num_stripes;
- } else if (io_geom.mirror_num) {
- io_geom.stripe_index = io_geom.mirror_num - 1;
- } else {
- io_geom.mirror_num = 1;
- }
-
+ map_blocks_for_dup(map, op, &io_geom);
} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
u32 factor = map->num_stripes / map->sub_stripes;


--
2.43.0

2023-12-12 12:39:04

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 06/13] btrfs: factor out block mapping for RAID10

Now that we have a container for the I/O geometry that has all the needed
information for the block mappings of RAID10, factor out a helper calculating
this information.

Signed-off-by: Johannes Thumshirn <[email protected]>
---
fs/btrfs/volumes.c | 48 +++++++++++++++++++++++++++++++-----------------
1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b0a5c53fba51..9090bfc4fe38 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6408,6 +6408,35 @@ static void map_blocks_for_dup(struct btrfs_chunk_map *map,
io_geom->mirror_num = 1;
}

+static void map_blocks_for_raid10(struct btrfs_fs_info *fs_info,
+ struct btrfs_chunk_map *map,
+ enum btrfs_map_op op,
+ struct btrfs_io_geometry *io_geom,
+ int replace)
+{
+ u32 factor = map->num_stripes / map->sub_stripes;
+ int old_stripe_index;
+
+ io_geom->stripe_index =
+ (io_geom->stripe_nr % factor) * map->sub_stripes;
+ io_geom->stripe_nr /= factor;
+
+ if (op != BTRFS_MAP_READ) {
+ io_geom->num_stripes = map->sub_stripes;
+ return;
+ }
+
+ if (io_geom->mirror_num) {
+ io_geom->stripe_index += io_geom->mirror_num - 1;
+ return;
+ }
+
+ old_stripe_index = io_geom->stripe_index;
+ io_geom->stripe_index =
+ find_live_mirror(fs_info, map, io_geom->stripe_index, replace);
+ io_geom->mirror_num = io_geom->stripe_index - old_stripe_index + 1;
+}
+
/*
* Map one logical range to one or more physical ranges.
*
@@ -6500,23 +6529,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
map_blocks_for_dup(map, op, &io_geom);
} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
- u32 factor = map->num_stripes / map->sub_stripes;
-
- io_geom.stripe_index = (io_geom.stripe_nr % factor) * map->sub_stripes;
- io_geom.stripe_nr /= factor;
-
- if (op != BTRFS_MAP_READ)
- io_geom.num_stripes = map->sub_stripes;
- else if (io_geom.mirror_num)
- io_geom.stripe_index += io_geom.mirror_num - 1;
- else {
- int old_stripe_index = io_geom.stripe_index;
- io_geom.stripe_index = find_live_mirror(fs_info, map,
- io_geom.stripe_index,
- dev_replace_is_ongoing);
- io_geom.mirror_num = io_geom.stripe_index - old_stripe_index + 1;
- }
-
+ map_blocks_for_raid10(fs_info, map, op, &io_geom,
+ dev_replace_is_ongoing);
} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
if (op != BTRFS_MAP_READ || io_geom.mirror_num > 1) {
/*

--
2.43.0

2023-12-12 12:39:08

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 07/13] btrfs: reduce scope of data_stripes in btrfs_map_block

Reduce the scope of 'data_stripes' in btrfs_map_block(). While the change
allone doesn't make too much sense, it helps us factoring out a helper
function for the block mapping of RAID56 I/O.

Signed-off-by: Johannes Thumshirn <[email protected]>
---
fs/btrfs/volumes.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9090bfc4fe38..bc0988d8bd56 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6480,7 +6480,6 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
struct btrfs_chunk_map *map;
struct btrfs_io_geometry io_geom = { };
u64 map_offset;
- int data_stripes;
int i;
int ret = 0;
int num_copies;
@@ -6500,8 +6499,6 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
if (IS_ERR(map))
return PTR_ERR(map);

- data_stripes = nr_data_stripes(map);
-
map_offset = logical - map->start;
io_geom.raid56_full_stripe_start = (u64)-1;
max_len = btrfs_max_io_len(map, op, map_offset, &io_geom.stripe_nr,
@@ -6532,6 +6529,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
map_blocks_for_raid10(fs_info, map, op, &io_geom,
dev_replace_is_ongoing);
} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+ int data_stripes = nr_data_stripes(map);
+
if (op != BTRFS_MAP_READ || io_geom.mirror_num > 1) {
/*
* Needs full stripe mapping.
@@ -6643,7 +6642,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* modulo, to reduce one modulo call.
*/
bioc->full_stripe_logical = map->start +
- btrfs_stripe_nr_to_offset(io_geom.stripe_nr * data_stripes);
+ btrfs_stripe_nr_to_offset(io_geom.stripe_nr *
+ nr_data_stripes(map));
for (int i = 0; i < io_geom.num_stripes; i++) {
ret = set_io_stripe(fs_info, op, logical, length,
&bioc->stripes[i], map,

--
2.43.0

2023-12-12 12:39:16

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 08/13] btrfs: factor out block mapping for RAID5/6

Now that we have a container for the I/O geometry that has all the needed
information for the block mappings of RAID5 and RAID6, factor out a helper
calculating this information.

Signed-off-by: Johannes Thumshirn <[email protected]>
---
fs/btrfs/volumes.c | 91 +++++++++++++++++++++++++++++-------------------------
1 file changed, 49 insertions(+), 42 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bc0988d8bd56..fd213bb7d619 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6437,6 +6437,54 @@ static void map_blocks_for_raid10(struct btrfs_fs_info *fs_info,
io_geom->mirror_num = io_geom->stripe_index - old_stripe_index + 1;
}

+static void map_blocks_for_raid56(struct btrfs_chunk_map *map,
+ enum btrfs_map_op op,
+ struct btrfs_io_geometry *io_geom,
+ u64 logical, u64 *length)
+{
+ int data_stripes = nr_data_stripes(map);
+
+ if (op != BTRFS_MAP_READ || io_geom->mirror_num > 1) {
+ /*
+ * Needs full stripe mapping.
+ *
+ * Push stripe_nr back to the start of the full stripe
+ * For those cases needing a full stripe, @stripe_nr
+ * is the full stripe number.
+ *
+ * Originally we go raid56_full_stripe_start / full_stripe_len,
+ * but that can be expensive. Here we just divide
+ * @stripe_nr with @data_stripes.
+ */
+ io_geom->stripe_nr /= data_stripes;
+
+ /* RAID[56] write or recovery. Return all stripes */
+ io_geom->num_stripes = map->num_stripes;
+ io_geom->max_errors = btrfs_chunk_max_errors(map);
+
+ /* Return the length to the full stripe end */
+ *length = min(logical + *length,
+ io_geom->raid56_full_stripe_start + map->start +
+ btrfs_stripe_nr_to_offset(data_stripes)) -
+ logical;
+ io_geom->stripe_index = 0;
+ io_geom->stripe_offset = 0;
+ return;
+ }
+
+ ASSERT(io_geom->mirror_num <= 1);
+ /* Just grab the data stripe directly. */
+ io_geom->stripe_index = io_geom->stripe_nr % data_stripes;
+ io_geom->stripe_nr /= data_stripes;
+
+ /* We distribute the parity blocks across stripes */
+ io_geom->stripe_index =
+ (io_geom->stripe_nr + io_geom->stripe_index) % map->num_stripes;
+
+ if (op == BTRFS_MAP_READ && io_geom->mirror_num < 1)
+ io_geom->mirror_num = 1;
+}
+
/*
* Map one logical range to one or more physical ranges.
*
@@ -6529,48 +6577,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
map_blocks_for_raid10(fs_info, map, op, &io_geom,
dev_replace_is_ongoing);
} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
- int data_stripes = nr_data_stripes(map);
-
- if (op != BTRFS_MAP_READ || io_geom.mirror_num > 1) {
- /*
- * Needs full stripe mapping.
- *
- * Push stripe_nr back to the start of the full stripe
- * For those cases needing a full stripe, @stripe_nr
- * is the full stripe number.
- *
- * Originally we go raid56_full_stripe_start / full_stripe_len,
- * but that can be expensive. Here we just divide
- * @stripe_nr with @data_stripes.
- */
- io_geom.stripe_nr /= data_stripes;
-
- /* RAID[56] write or recovery. Return all stripes */
- io_geom.num_stripes = map->num_stripes;
- io_geom.max_errors = btrfs_chunk_max_errors(map);
-
- /* Return the length to the full stripe end */
- *length = min(logical + *length,
- io_geom.raid56_full_stripe_start +
- map->start +
- btrfs_stripe_nr_to_offset(
- data_stripes)) -
- logical;
- io_geom.stripe_index = 0;
- io_geom.stripe_offset = 0;
- } else {
- ASSERT(io_geom.mirror_num <= 1);
- /* Just grab the data stripe directly. */
- io_geom.stripe_index = io_geom.stripe_nr % data_stripes;
- io_geom.stripe_nr /= data_stripes;
-
- /* We distribute the parity blocks across stripes */
- io_geom.stripe_index =
- (io_geom.stripe_nr + io_geom.stripe_index) %
- map->num_stripes;
- if (op == BTRFS_MAP_READ && io_geom.mirror_num < 1)
- io_geom.mirror_num = 1;
- }
+ map_blocks_for_raid56(map, op, &io_geom, logical, length);
} else {
/*
* After this, stripe_nr is the number of stripes on this

--
2.43.0

2023-12-12 12:39:22

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 11/13] btrfs: open code set_io_stripe for RAID56

Open code set_io_stripe() for RAID56, as it a) uses a different method to
calculate the stripe_index and b) doesn't need to go through raid-stripe-tree
mapping code.

Signed-off-by: Johannes Thumshirn <[email protected]>
---
fs/btrfs/volumes.c | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 946333c8c331..7df991a81c4b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6670,13 +6670,16 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
btrfs_stripe_nr_to_offset(io_geom.stripe_nr *
nr_data_stripes(map));
for (int i = 0; i < io_geom.num_stripes; i++) {
- ret = set_io_stripe(fs_info, op, logical, length,
- &bioc->stripes[i], map,
- (i + io_geom.stripe_nr) % io_geom.num_stripes,
- io_geom.stripe_offset,
- io_geom.stripe_nr);
- if (ret < 0)
- break;
+ struct btrfs_io_stripe *dst = &bioc->stripes[i];
+ u32 stripe_index;
+
+ stripe_index =
+ (i + io_geom.stripe_nr) % io_geom.num_stripes;
+ dst->dev = map->stripes[stripe_index].dev;
+ dst->physical =
+ map->stripes[stripe_index].physical +
+ io_geom.stripe_offset +
+ btrfs_stripe_nr_to_offset(io_geom.stripe_nr);
}
} else {
/*

--
2.43.0

2023-12-12 12:39:34

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 09/13] btrfs: factor out block mapping for single profiles

Now that we have a container for the I/O geometry that has all the needed
information for the block mappings of SINGLE profiles, factor out a helper
calculating this information.

Signed-off-by: Johannes Thumshirn <[email protected]>
---
fs/btrfs/volumes.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index fd213bb7d619..e7bd3a25c516 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6485,6 +6485,14 @@ static void map_blocks_for_raid56(struct btrfs_chunk_map *map,
io_geom->mirror_num = 1;
}

+static void map_blocks_for_single(struct btrfs_chunk_map *map,
+ struct btrfs_io_geometry *io_geom)
+{
+ io_geom->stripe_index = io_geom->stripe_nr % map->num_stripes;
+ io_geom->stripe_nr /= map->num_stripes;
+ io_geom->mirror_num = io_geom->stripe_index + 1;
+}
+
/*
* Map one logical range to one or more physical ranges.
*
@@ -6584,9 +6592,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* device we have to walk to find the data, and stripe_index is
* the number of our device in the stripe array
*/
- io_geom.stripe_index = io_geom.stripe_nr % map->num_stripes;
- io_geom.stripe_nr /= map->num_stripes;
- io_geom.mirror_num = io_geom.stripe_index + 1;
+ map_blocks_for_single(map, &io_geom);
}
if (io_geom.stripe_index >= map->num_stripes) {
btrfs_crit(fs_info,

--
2.43.0

2023-12-12 12:39:34

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 13/13] btrfs: pass btrfs_io_geometry into btrfs_max_io_len

Instead of passing three individual members of 'struct btrfs_io_geometry'
into btrfs_max_io_len(), pass a pointer to btrfs_io_geometry.

Signed-off-by: Johannes Thumshirn <[email protected]>
---
fs/btrfs/volumes.c | 28 +++++++++++++---------------
1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c1fefe34a194..166750d279ee 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6277,16 +6277,15 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op,
}

static u64 btrfs_max_io_len(struct btrfs_chunk_map *map, enum btrfs_map_op op,
- u64 offset, u32 *stripe_nr, u64 *stripe_offset,
- u64 *full_stripe_start)
+ u64 offset, struct btrfs_io_geometry *io_geom)
{
/*
* Stripe_nr is the stripe where this block falls. stripe_offset is
* the offset of this block in its stripe.
*/
- *stripe_offset = offset & BTRFS_STRIPE_LEN_MASK;
- *stripe_nr = offset >> BTRFS_STRIPE_LEN_SHIFT;
- ASSERT(*stripe_offset < U32_MAX);
+ io_geom->stripe_offset = offset & BTRFS_STRIPE_LEN_MASK;
+ io_geom->stripe_nr = offset >> BTRFS_STRIPE_LEN_SHIFT;
+ ASSERT(io_geom->stripe_offset < U32_MAX);

if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
unsigned long full_stripe_len =
@@ -6301,18 +6300,19 @@ static u64 btrfs_max_io_len(struct btrfs_chunk_map *map, enum btrfs_map_op op,
* to go rounddown(), not round_down(), as nr_data_stripes is
* not ensured to be power of 2.
*/
- *full_stripe_start =
- btrfs_stripe_nr_to_offset(
- rounddown(*stripe_nr, nr_data_stripes(map)));
+ io_geom->raid56_full_stripe_start = btrfs_stripe_nr_to_offset(
+ rounddown(io_geom->stripe_nr, nr_data_stripes(map)));

- ASSERT(*full_stripe_start + full_stripe_len > offset);
- ASSERT(*full_stripe_start <= offset);
+ ASSERT(io_geom->raid56_full_stripe_start + full_stripe_len >
+ offset);
+ ASSERT(io_geom->raid56_full_stripe_start <= offset);
/*
* For writes to RAID56, allow to write a full stripe set, but
* no straddling of stripe sets.
*/
if (op == BTRFS_MAP_WRITE)
- return full_stripe_len - (offset - *full_stripe_start);
+ return full_stripe_len -
+ (offset - io_geom->raid56_full_stripe_start);
}

/*
@@ -6320,7 +6320,7 @@ static u64 btrfs_max_io_len(struct btrfs_chunk_map *map, enum btrfs_map_op op,
* a single disk).
*/
if (map->type & BTRFS_BLOCK_GROUP_STRIPE_MASK)
- return BTRFS_STRIPE_LEN - *stripe_offset;
+ return BTRFS_STRIPE_LEN - io_geom->stripe_offset;
return U64_MAX;
}

@@ -6559,9 +6559,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,

map_offset = logical - map->start;
io_geom.raid56_full_stripe_start = (u64)-1;
- max_len = btrfs_max_io_len(map, op, map_offset, &io_geom.stripe_nr,
- &io_geom.stripe_offset,
- &io_geom.raid56_full_stripe_start);
+ max_len = btrfs_max_io_len(map, op, map_offset, &io_geom);
*length = min_t(u64, map->chunk_len - map_offset, max_len);

down_read(&dev_replace->rwsem);

--
2.43.0

2023-12-12 12:39:36

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 12/13] btrfs: pass struct btrfs_io_geometry to set_io_stripe

Instead of passing three members of 'struct btrfs_io_geometry' into
set_io_stripe() pass a pointer to the whole structure and then get the needed
members out of btrfs_io_geometry.

Signed-off-by: Johannes Thumshirn <[email protected]>
---
fs/btrfs/volumes.c | 22 ++++++++++------------
1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7df991a81c4b..c1fefe34a194 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6326,17 +6326,19 @@ static u64 btrfs_max_io_len(struct btrfs_chunk_map *map, enum btrfs_map_op op,

static int set_io_stripe(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length, struct btrfs_io_stripe *dst,
- struct btrfs_chunk_map *map, u32 stripe_index,
- u64 stripe_offset, u64 stripe_nr)
+ struct btrfs_chunk_map *map,
+ struct btrfs_io_geometry *io_geom)
{
- dst->dev = map->stripes[stripe_index].dev;
+ dst->dev = map->stripes[io_geom->stripe_index].dev;

if (op == BTRFS_MAP_READ && btrfs_need_stripe_tree_update(fs_info, map->type))
return btrfs_get_raid_extent_offset(fs_info, logical, length,
- map->type, stripe_index, dst);
+ map->type,
+ io_geom->stripe_index, dst);

- dst->physical = map->stripes[stripe_index].physical +
- stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr);
+ dst->physical = map->stripes[io_geom->stripe_index].physical +
+ io_geom->stripe_offset +
+ btrfs_stripe_nr_to_offset(io_geom->stripe_nr);
return 0;
}

@@ -6634,8 +6636,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
if (is_single_device_io(fs_info, smap, map, num_alloc_stripes, op,
io_geom.mirror_num)) {
ret = set_io_stripe(fs_info, op, logical, length, smap, map,
- io_geom.stripe_index, io_geom.stripe_offset,
- io_geom.stripe_nr);
+ &io_geom);
if (mirror_num_ret)
*mirror_num_ret = io_geom.mirror_num;
*bioc_ret = NULL;
@@ -6688,10 +6689,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
*/
for (i = 0; i < io_geom.num_stripes; i++) {
ret = set_io_stripe(fs_info, op, logical, length,
- &bioc->stripes[i], map,
- io_geom.stripe_index,
- io_geom.stripe_offset,
- io_geom.stripe_nr);
+ &bioc->stripes[i], map, &io_geom);
if (ret < 0)
break;
io_geom.stripe_index++;

--
2.43.0

2023-12-12 12:39:40

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 10/13] btrfs: untagle if else maze in btrfs_map_block

Signed-off-by: Johannes Thumshirn <[email protected]>
---
fs/btrfs/volumes.c | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e7bd3a25c516..946333c8c331 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6574,26 +6574,38 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
io_geom.num_stripes = 1;
io_geom.stripe_index = 0;
io_geom.mirror_num = (mirror_num_ret ? *mirror_num_ret : 0);
- if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
+
+ switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ case BTRFS_BLOCK_GROUP_RAID0:
map_blocks_for_raid0(map, op, &io_geom);
- } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
+ break;
+ case BTRFS_BLOCK_GROUP_RAID1:
+ case BTRFS_BLOCK_GROUP_RAID1C3:
+ case BTRFS_BLOCK_GROUP_RAID1C4:
map_blocks_for_raid1(fs_info, map, op, &io_geom,
dev_replace_is_ongoing);
- } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
+ break;
+ case BTRFS_BLOCK_GROUP_DUP:
map_blocks_for_dup(map, op, &io_geom);
- } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
+ break;
+ case BTRFS_BLOCK_GROUP_RAID10:
map_blocks_for_raid10(fs_info, map, op, &io_geom,
dev_replace_is_ongoing);
- } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+ break;
+ case BTRFS_BLOCK_GROUP_RAID5:
+ case BTRFS_BLOCK_GROUP_RAID6:
map_blocks_for_raid56(map, op, &io_geom, logical, length);
- } else {
+ break;
+ default:
/*
* After this, stripe_nr is the number of stripes on this
* device we have to walk to find the data, and stripe_index is
* the number of our device in the stripe array
*/
map_blocks_for_single(map, &io_geom);
+ break;
}
+
if (io_geom.stripe_index >= map->num_stripes) {
btrfs_crit(fs_info,
"stripe index math went horribly wrong, got stripe_index=%u, num_stripes=%u",

--
2.43.0

2023-12-13 08:49:52

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 01/13] btrfs: factor out helper for single device IO check

On Tue, Dec 12, 2023 at 04:37:59AM -0800, Johannes Thumshirn wrote:
> The check in btrfs_map_block() deciding if a particular I/O is targeting a
> single device is getting more and more convoluted.
>
> Factor out the check conditions into a helper function, with no functional
> change otherwise.

Looks good:

Reviewed-by: Christoph Hellwig <[email protected]>

2023-12-13 08:51:09

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 03/13] btrfs: factor out block-mapping for RAID0

> +static void map_blocks_for_raid0(struct btrfs_chunk_map *map,

I'd skip the _for here as it is a bit redundant.

> + enum btrfs_map_op op,

Looking at all the patches: shouldn't the op also go into the
btrfs_io_geometry structure?

2023-12-13 08:52:23

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 04/13] btrfs: factor out RAID1 block mapping

On Tue, Dec 12, 2023 at 04:38:02AM -0800, Johannes Thumshirn wrote:
> Now that we have a container for the I/O geometry that has all the needed
> information for the block mappings of RAID1, factor out a helper calculating
> this information.
>
> Signed-off-by: Johannes Thumshirn <[email protected]>
> ---
> fs/btrfs/volumes.c | 31 +++++++++++++++++++++----------
> 1 file changed, 21 insertions(+), 10 deletions(-)
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index a5d85a77da25..f6f1e783b3c1 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -6372,6 +6372,25 @@ static void map_blocks_for_raid0(struct btrfs_chunk_map *map,
> io_geom->mirror_num = 1;
> }
>
> +static void map_blocks_for_raid1(struct btrfs_fs_info *fs_info,
> + struct btrfs_chunk_map *map,
> + enum btrfs_map_op op,
> + struct btrfs_io_geometry *io_geom, int replace)

replace looks like a bool to me. Also elsewhere in the code it is
called dev_replace_is_ongoing. Even if that name is a little clumsy
it's nice to not switch forth and back between names in a call chain.

2023-12-13 08:53:16

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 08/13] btrfs: factor out block mapping for RAID5/6

> +static void map_blocks_for_raid56(struct btrfs_chunk_map *map,
> + enum btrfs_map_op op,
> + struct btrfs_io_geometry *io_geom,
> + u64 logical, u64 *length)
> +{
> + int data_stripes = nr_data_stripes(map);
> +
> + if (op != BTRFS_MAP_READ || io_geom->mirror_num > 1) {

Any reason to not have separate read/write helpers here given that
they don't really share anything?

2023-12-13 08:53:51

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 10/13] btrfs: untagle if else maze in btrfs_map_block

Might be worth mentioning that you're using a switch statement :)

Otherwise looks good:

Reviewed-by: Christoph Hellwig <[email protected]>

2023-12-13 08:58:52

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 11/13] btrfs: open code set_io_stripe for RAID56

On Tue, Dec 12, 2023 at 04:38:09AM -0800, Johannes Thumshirn wrote:
> Open code set_io_stripe() for RAID56, as it a) uses a different method to
> calculate the stripe_index and b) doesn't need to go through raid-stripe-tree
> mapping code.

Looks good:

Reviewed-by: Christoph Hellwig <[email protected]>

I think raid stripe tree handling also really should move out of
set_io_stripe. Below is the latest I have, although it probably won't
apply to your tree:

---
From ac208da48d7f9d11eef8a01ac0c6fbf9681665b5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <[email protected]>
Date: Thu, 22 Jun 2023 05:53:13 +0200
Subject: btrfs: move raid-stripe-tree handling out of set_io_stripe

set_io_stripe gets a little too complicated with the raid-stripe-tree
handling. Move it out into the only callers that actually needs it.

The only reads with more than a single stripe is the parity raid recovery
case thast will need very special handling anyway once implemented.

Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/btrfs/volumes.c | 61 ++++++++++++++++++++--------------------------
1 file changed, 27 insertions(+), 34 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 30ee5d1670d034..e32eefa242b0a4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6233,22 +6233,12 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
return U64_MAX;
}

-static int set_io_stripe(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
- u64 logical, u64 *length, struct btrfs_io_stripe *dst,
- struct map_lookup *map, u32 stripe_index,
- u64 stripe_offset, u64 stripe_nr)
+static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *map,
+ u32 stripe_index, u64 stripe_offset, u32 stripe_nr)
{
dst->dev = map->stripes[stripe_index].dev;
-
- if (op == BTRFS_MAP_READ &&
- btrfs_use_raid_stripe_tree(fs_info, map->type))
- return btrfs_get_raid_extent_offset(fs_info, logical, length,
- map->type, stripe_index,
- dst);
-
dst->physical = map->stripes[stripe_index].physical +
stripe_offset + ((u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT);
- return 0;
}

int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
@@ -6423,15 +6413,24 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* physical block information on the stack instead of allocating an
* I/O context structure.
*/
- if (smap && num_alloc_stripes == 1 &&
- !(btrfs_use_raid_stripe_tree(fs_info, map->type) &&
- op != BTRFS_MAP_READ) &&
- !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) {
- ret = set_io_stripe(fs_info, op, logical, length, smap, map,
- stripe_index, stripe_offset, stripe_nr);
- *mirror_num_ret = mirror_num;
- *bioc_ret = NULL;
- goto out;
+ if (smap && num_alloc_stripes == 1) {
+ if (op == BTRFS_MAP_READ &&
+ btrfs_use_raid_stripe_tree(fs_info, map->type)) {
+ ret = btrfs_get_raid_extent_offset(fs_info, logical,
+ length, map->type,
+ stripe_index, smap);
+ *mirror_num_ret = mirror_num;
+ *bioc_ret = NULL;
+ goto out;
+ } else if (!(map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) ||
+ mirror_num == 0) {
+ set_io_stripe(smap, map, stripe_index, stripe_offset,
+ stripe_nr);
+ *mirror_num_ret = mirror_num;
+ *bioc_ret = NULL;
+ ret = 0;
+ goto out;
+ }
}

bioc = alloc_btrfs_io_context(fs_info, logical, num_alloc_stripes);
@@ -6448,6 +6447,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
*
* It's still mostly the same as other profiles, just with extra rotation.
*/
+ ASSERT(op != BTRFS_MAP_READ ||
+ btrfs_use_raid_stripe_tree(fs_info, map->type));
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
(op != BTRFS_MAP_READ || mirror_num > 1)) {
/*
@@ -6461,29 +6462,21 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
bioc->full_stripe_logical = em->start +
((stripe_nr * data_stripes) << BTRFS_STRIPE_LEN_SHIFT);
for (i = 0; i < num_stripes; i++)
- ret = set_io_stripe(fs_info, op, logical, length,
- &bioc->stripes[i], map,
- (i + stripe_nr) % num_stripes,
- stripe_offset, stripe_nr);
+ set_io_stripe(&bioc->stripes[i], map,
+ (i + stripe_nr) % num_stripes,
+ stripe_offset, stripe_nr);
} else {
/*
* For all other non-RAID56 profiles, just copy the target
* stripe into the bioc.
*/
for (i = 0; i < num_stripes; i++) {
- ret = set_io_stripe(fs_info, op, logical, length,
- &bioc->stripes[i], map, stripe_index,
- stripe_offset, stripe_nr);
+ set_io_stripe(&bioc->stripes[i], map, stripe_index,
+ stripe_offset, stripe_nr);
stripe_index++;
}
}

- if (ret) {
- *bioc_ret = NULL;
- btrfs_put_bioc(bioc);
- goto out;
- }
-
if (op != BTRFS_MAP_READ)
max_errors = btrfs_chunk_max_errors(map);

--
2.39.2

2023-12-13 09:02:30

by Johannes Thumshirn

[permalink] [raw]
Subject: Re: [PATCH 03/13] btrfs: factor out block-mapping for RAID0

On 13.12.23 09:51, Christoph Hellwig wrote:
>> +static void map_blocks_for_raid0(struct btrfs_chunk_map *map,
>
> I'd skip the _for here as it is a bit redundant.

OK.

>> + enum btrfs_map_op op,
>
> Looking at all the patches: shouldn't the op also go into the
> btrfs_io_geometry structure?

Would make sense yes.

2023-12-13 09:04:18

by Johannes Thumshirn

[permalink] [raw]
Subject: Re: [PATCH 08/13] btrfs: factor out block mapping for RAID5/6

On 13.12.23 09:53, Christoph Hellwig wrote:
>> +static void map_blocks_for_raid56(struct btrfs_chunk_map *map,
>> + enum btrfs_map_op op,
>> + struct btrfs_io_geometry *io_geom,
>> + u64 logical, u64 *length)
>> +{
>> + int data_stripes = nr_data_stripes(map);
>> +
>> + if (op != BTRFS_MAP_READ || io_geom->mirror_num > 1) {
>
> Any reason to not have separate read/write helpers here given that
> they don't really share anything?
>

Nope, can do sure.

2023-12-13 09:10:03

by Johannes Thumshirn

[permalink] [raw]
Subject: Re: [PATCH 11/13] btrfs: open code set_io_stripe for RAID56

On 13.12.23 09:58, Christoph Hellwig wrote:
> On Tue, Dec 12, 2023 at 04:38:09AM -0800, Johannes Thumshirn wrote:
>> Open code set_io_stripe() for RAID56, as it a) uses a different method to
>> calculate the stripe_index and b) doesn't need to go through raid-stripe-tree
>> mapping code.
>
> Looks good:
>
> Reviewed-by: Christoph Hellwig <[email protected]>
>
> I think raid stripe tree handling also really should move out of
> set_io_stripe. Below is the latest I have, although it probably won't
> apply to your tree:
>

That would work as well and replace patch 1 then. Let me think about it.

2023-12-13 09:17:29

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 11/13] btrfs: open code set_io_stripe for RAID56

On Wed, Dec 13, 2023 at 09:09:47AM +0000, Johannes Thumshirn wrote:
> > I think raid stripe tree handling also really should move out of
> > set_io_stripe. Below is the latest I have, although it probably won't
> > apply to your tree:
> >
>
> That would work as well and replace patch 1 then. Let me think about it.

I actually really like splitting that check into a helper for
documentation purposes. Btw, this my full tree that the patch is from
in case it is useful:

http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/raid-stripe-tree-cleanups

2023-12-13 09:23:49

by Johannes Thumshirn

[permalink] [raw]
Subject: Re: [PATCH 11/13] btrfs: open code set_io_stripe for RAID56

On 13.12.23 10:17, [email protected] wrote:
> On Wed, Dec 13, 2023 at 09:09:47AM +0000, Johannes Thumshirn wrote:
>>> I think raid stripe tree handling also really should move out of
>>> set_io_stripe. Below is the latest I have, although it probably won't
>>> apply to your tree:
>>>
>>
>> That would work as well and replace patch 1 then. Let me think about it.
>
> I actually really like splitting that check into a helper for
> documentation purposes. Btw, this my full tree that the patch is from
> in case it is useful:
>
> http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/raid-stripe-tree-cleanups
>

Cool thanks, I'll have a look :)

2023-12-13 15:36:54

by Johannes Thumshirn

[permalink] [raw]
Subject: Re: [PATCH 11/13] btrfs: open code set_io_stripe for RAID56

On 13.12.23 09:58, Christoph Hellwig wrote:
>
> I think raid stripe tree handling also really should move out of
> set_io_stripe. Below is the latest I have, although it probably won't
> apply to your tree:

I've decided to add that one afterwards giving the attribution to you.
There are some other patches in your tree as well, which I want to have
a look at too.

> ---
> From ac208da48d7f9d11eef8a01ac0c6fbf9681665b5 Mon Sep 17 00:00:00 2001
> From: Christoph Hellwig <[email protected]>
> Date: Thu, 22 Jun 2023 05:53:13 +0200
> Subject: btrfs: move raid-stripe-tree handling out of set_io_stripe
>
> set_io_stripe gets a little too complicated with the raid-stripe-tree
> handling. Move it out into the only callers that actually needs it.
>
> The only reads with more than a single stripe is the parity raid recovery
> case thast will need very special handling anyway once implemented.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> fs/btrfs/volumes.c | 61 ++++++++++++++++++++--------------------------
> 1 file changed, 27 insertions(+), 34 deletions(-)
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 30ee5d1670d034..e32eefa242b0a4 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -6233,22 +6233,12 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
> return U64_MAX;
> }
>
> -static int set_io_stripe(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> - u64 logical, u64 *length, struct btrfs_io_stripe *dst,
> - struct map_lookup *map, u32 stripe_index,
> - u64 stripe_offset, u64 stripe_nr)
> +static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *map,
> + u32 stripe_index, u64 stripe_offset, u32 stripe_nr)
> {
> dst->dev = map->stripes[stripe_index].dev;
> -
> - if (op == BTRFS_MAP_READ &&
> - btrfs_use_raid_stripe_tree(fs_info, map->type))
> - return btrfs_get_raid_extent_offset(fs_info, logical, length,
> - map->type, stripe_index,
> - dst);
> -
> dst->physical = map->stripes[stripe_index].physical +
> stripe_offset + ((u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT);
> - return 0;
> }
>
> int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> @@ -6423,15 +6413,24 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> * physical block information on the stack instead of allocating an
> * I/O context structure.
> */
> - if (smap && num_alloc_stripes == 1 &&
> - !(btrfs_use_raid_stripe_tree(fs_info, map->type) &&
> - op != BTRFS_MAP_READ) &&
> - !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) {
> - ret = set_io_stripe(fs_info, op, logical, length, smap, map,
> - stripe_index, stripe_offset, stripe_nr);
> - *mirror_num_ret = mirror_num;
> - *bioc_ret = NULL;
> - goto out;
> + if (smap && num_alloc_stripes == 1) {
> + if (op == BTRFS_MAP_READ &&
> + btrfs_use_raid_stripe_tree(fs_info, map->type)) {
> + ret = btrfs_get_raid_extent_offset(fs_info, logical,
> + length, map->type,
> + stripe_index, smap);
> + *mirror_num_ret = mirror_num;
> + *bioc_ret = NULL;
> + goto out;
> + } else if (!(map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) ||
> + mirror_num == 0) {
> + set_io_stripe(smap, map, stripe_index, stripe_offset,
> + stripe_nr);
> + *mirror_num_ret = mirror_num;
> + *bioc_ret = NULL;
> + ret = 0;
> + goto out;
> + }
> }
>
> bioc = alloc_btrfs_io_context(fs_info, logical, num_alloc_stripes);
> @@ -6448,6 +6447,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> *
> * It's still mostly the same as other profiles, just with extra rotation.
> */
> + ASSERT(op != BTRFS_MAP_READ ||
> + btrfs_use_raid_stripe_tree(fs_info, map->type));
> if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
> (op != BTRFS_MAP_READ || mirror_num > 1)) {
> /*
> @@ -6461,29 +6462,21 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> bioc->full_stripe_logical = em->start +
> ((stripe_nr * data_stripes) << BTRFS_STRIPE_LEN_SHIFT);
> for (i = 0; i < num_stripes; i++)
> - ret = set_io_stripe(fs_info, op, logical, length,
> - &bioc->stripes[i], map,
> - (i + stripe_nr) % num_stripes,
> - stripe_offset, stripe_nr);
> + set_io_stripe(&bioc->stripes[i], map,
> + (i + stripe_nr) % num_stripes,
> + stripe_offset, stripe_nr);
> } else {
> /*
> * For all other non-RAID56 profiles, just copy the target
> * stripe into the bioc.
> */
> for (i = 0; i < num_stripes; i++) {
> - ret = set_io_stripe(fs_info, op, logical, length,
> - &bioc->stripes[i], map, stripe_index,
> - stripe_offset, stripe_nr);
> + set_io_stripe(&bioc->stripes[i], map, stripe_index,
> + stripe_offset, stripe_nr);
> stripe_index++;
> }
> }
>
> - if (ret) {
> - *bioc_ret = NULL;
> - btrfs_put_bioc(bioc);
> - goto out;
> - }
> -
> if (op != BTRFS_MAP_READ)
> max_errors = btrfs_chunk_max_errors(map);
>