2021-01-27 23:59:16

by Michal Rostecki

[permalink] [raw]
Subject: [PATCH v2] btrfs: Avoid calling btrfs_get_chunk_map() twice

From: Michal Rostecki <[email protected]>

Before this change, the btrfs_get_io_geometry() function was calling
btrfs_get_chunk_map() to get the extent mapping, necessary for
calculating the I/O geometry. It was using that extent mapping only
internally and freeing the pointer after its execution.

That resulted in calling btrfs_get_chunk_map() de facto twice by the
__btrfs_map_block() function. It was calling btrfs_get_io_geometry()
first and then calling btrfs_get_chunk_map() directly to get the extent
mapping, used by the rest of the function.

This change fixes that by passing the extent mapping to the
btrfs_get_io_geometry() function as an argument.

v2:
When btrfs_get_chunk_map() returns an error in btrfs_submit_direct():
- Use errno_to_blk_status(PTR_ERR(em)) as the status
- Set em to NULL

Signed-off-by: Michal Rostecki <[email protected]>
---
fs/btrfs/inode.c | 38 +++++++++++++++++++++++++++++---------
fs/btrfs/volumes.c | 39 ++++++++++++++++-----------------------
fs/btrfs/volumes.h | 5 +++--
3 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0dbe1aaa0b71..e2ee3a9c1140 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2183,9 +2183,10 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
u64 logical = bio->bi_iter.bi_sector << 9;
+ struct extent_map *em;
u64 length = 0;
u64 map_length;
- int ret;
+ int ret = 0;
struct btrfs_io_geometry geom;

if (bio_flags & EXTENT_BIO_COMPRESSED)
@@ -2193,14 +2194,21 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,

length = bio->bi_iter.bi_size;
map_length = length;
- ret = btrfs_get_io_geometry(fs_info, btrfs_op(bio), logical, map_length,
- &geom);
+ em = btrfs_get_chunk_map(fs_info, logical, map_length);
+ if (IS_ERR(em))
+ return PTR_ERR(em);
+ ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio), logical,
+ map_length, &geom);
if (ret < 0)
- return ret;
+ goto out;

- if (geom.len < length + size)
- return 1;
- return 0;
+ if (geom.len < length + size) {
+ ret = 1;
+ goto out;
+ }
+out:
+ free_extent_map(em);
+ return ret;
}

/*
@@ -7941,10 +7949,12 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
u64 submit_len;
int clone_offset = 0;
int clone_len;
+ int logical;
int ret;
blk_status_t status;
struct btrfs_io_geometry geom;
struct btrfs_dio_data *dio_data = iomap->private;
+ struct extent_map *em;

dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
if (!dip) {
@@ -7970,11 +7980,18 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
}

start_sector = dio_bio->bi_iter.bi_sector;
+ logical = start_sector << 9;
submit_len = dio_bio->bi_iter.bi_size;

do {
- ret = btrfs_get_io_geometry(fs_info, btrfs_op(dio_bio),
- start_sector << 9, submit_len,
+ em = btrfs_get_chunk_map(fs_info, logical, submit_len);
+ if (IS_ERR(em)) {
+ status = errno_to_blk_status(PTR_ERR(em));
+ em = NULL;
+ goto out_err;
+ }
+ ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(dio_bio),
+ logical, submit_len,
&geom);
if (ret) {
status = errno_to_blk_status(ret);
@@ -8030,12 +8047,15 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
clone_offset += clone_len;
start_sector += clone_len >> 9;
file_offset += clone_len;
+
+ free_extent_map(em);
} while (submit_len > 0);
return BLK_QC_T_NONE;

out_err:
dip->dio_bio->bi_status = status;
btrfs_dio_private_put(dip);
+ free_extent_map(em);
return BLK_QC_T_NONE;
}

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a8ec8539cd8d..4c753b17c0a2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5940,23 +5940,24 @@ static bool need_full_stripe(enum btrfs_map_op op)
}

/*
- * btrfs_get_io_geometry - calculates the geomery of a particular (address, len)
+ * btrfs_get_io_geometry - calculates the geometry of a particular (address, len)
* tuple. This information is used to calculate how big a
* particular bio can get before it straddles a stripe.
*
- * @fs_info - the filesystem
- * @logical - address that we want to figure out the geometry of
- * @len - the length of IO we are going to perform, starting at @logical
- * @op - type of operation - write or read
- * @io_geom - pointer used to return values
+ * @fs_info: the filesystem
+ * @em: mapping containing the logical extent
+ * @op: type of operation - write or read
+ * @logical: address that we want to figure out the geometry of
+ * @len: the length of IO we are going to perform, starting at @logical
+ * @io_geom: pointer used to return values
*
* Returns < 0 in case a chunk for the given logical address cannot be found,
* usually shouldn't happen unless @logical is corrupted, 0 otherwise.
*/
-int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
- u64 logical, u64 len, struct btrfs_io_geometry *io_geom)
+int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em,
+ enum btrfs_map_op op, u64 logical, u64 len,
+ struct btrfs_io_geometry *io_geom)
{
- struct extent_map *em;
struct map_lookup *map;
u64 offset;
u64 stripe_offset;
@@ -5964,14 +5965,9 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 stripe_len;
u64 raid56_full_stripe_start = (u64)-1;
int data_stripes;
- int ret = 0;

ASSERT(op != BTRFS_MAP_DISCARD);

- em = btrfs_get_chunk_map(fs_info, logical, len);
- if (IS_ERR(em))
- return PTR_ERR(em);
-
map = em->map_lookup;
/* Offset of this logical address in the chunk */
offset = logical - em->start;
@@ -5985,8 +5981,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
btrfs_crit(fs_info,
"stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
stripe_offset, offset, em->start, logical, stripe_len);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}

/* stripe_offset is the offset of this block in its stripe */
@@ -6033,10 +6028,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
io_geom->stripe_offset = stripe_offset;
io_geom->raid56_stripe_offset = raid56_full_stripe_start;

-out:
- /* once for us */
- free_extent_map(em);
- return ret;
+ return 0;
}

static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
@@ -6069,12 +6061,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
ASSERT(bbio_ret);
ASSERT(op != BTRFS_MAP_DISCARD);

- ret = btrfs_get_io_geometry(fs_info, op, logical, *length, &geom);
+ em = btrfs_get_chunk_map(fs_info, logical, *length);
+ ASSERT(!IS_ERR(em));
+
+ ret = btrfs_get_io_geometry(fs_info, em, op, logical, *length, &geom);
if (ret < 0)
return ret;

- em = btrfs_get_chunk_map(fs_info, logical, *length);
- ASSERT(!IS_ERR(em));
map = em->map_lookup;

*length = geom.len;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index c43663d9c22e..04e2b26823c2 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -440,8 +440,9 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret);
-int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
- u64 logical, u64 len, struct btrfs_io_geometry *io_geom);
+int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *map,
+ enum btrfs_map_op op, u64 logical, u64 len,
+ struct btrfs_io_geometry *io_geom);
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type);
--
2.30.0


2021-01-28 00:01:35

by Nikolay Borisov

[permalink] [raw]
Subject: Re: [PATCH v2] btrfs: Avoid calling btrfs_get_chunk_map() twice



On 27.01.21 г. 15:57 ч., Michal Rostecki wrote:
> From: Michal Rostecki <[email protected]>
>
> Before this change, the btrfs_get_io_geometry() function was calling
> btrfs_get_chunk_map() to get the extent mapping, necessary for
> calculating the I/O geometry. It was using that extent mapping only
> internally and freeing the pointer after its execution.
>
> That resulted in calling btrfs_get_chunk_map() de facto twice by the
> __btrfs_map_block() function. It was calling btrfs_get_io_geometry()
> first and then calling btrfs_get_chunk_map() directly to get the extent
> mapping, used by the rest of the function.
>
> This change fixes that by passing the extent mapping to the
> btrfs_get_io_geometry() function as an argument.
>
> v2:
> When btrfs_get_chunk_map() returns an error in btrfs_submit_direct():
> - Use errno_to_blk_status(PTR_ERR(em)) as the status
> - Set em to NULL
>
> Signed-off-by: Michal Rostecki <[email protected]>
> ---
> fs/btrfs/inode.c | 38 +++++++++++++++++++++++++++++---------
> fs/btrfs/volumes.c | 39 ++++++++++++++++-----------------------
> fs/btrfs/volumes.h | 5 +++--
> 3 files changed, 48 insertions(+), 34 deletions(-)

So this adds more code but for what benefit? In your reply to Filipe you
said you didn't observe this being a performance-affecting change so

>
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 0dbe1aaa0b71..e2ee3a9c1140 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -2183,9 +2183,10 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
> struct inode *inode = page->mapping->host;
> struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> u64 logical = bio->bi_iter.bi_sector << 9;
> + struct extent_map *em;
> u64 length = 0;
> u64 map_length;
> - int ret;
> + int ret = 0;
> struct btrfs_io_geometry geom;
>
> if (bio_flags & EXTENT_BIO_COMPRESSED)
> @@ -2193,14 +2194,21 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
>
> length = bio->bi_iter.bi_size;
> map_length = length;
> - ret = btrfs_get_io_geometry(fs_info, btrfs_op(bio), logical, map_length,
> - &geom);
> + em = btrfs_get_chunk_map(fs_info, logical, map_length);
> + if (IS_ERR(em))
> + return PTR_ERR(em);
> + ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio), logical,
> + map_length, &geom);
> if (ret < 0)
> - return ret;
> + goto out;
>
> - if (geom.len < length + size)
> - return 1;
> - return 0;
> + if (geom.len < length + size) {
> + ret = 1;
> + goto out;
> + }

this could be simply

if (geom.len <length + size)
ret = 1;

Not need for the extra 'goto out'

> +out:
> + free_extent_map(em);
> + return ret;
> }
>
> /*
> @@ -7941,10 +7949,12 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
> u64 submit_len;
> int clone_offset = 0;
> int clone_len;
> + int logical;
> int ret;
> blk_status_t status;
> struct btrfs_io_geometry geom;
> struct btrfs_dio_data *dio_data = iomap->private;
> + struct extent_map *em;
>
> dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
> if (!dip) {
> @@ -7970,11 +7980,18 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
> }
>
> start_sector = dio_bio->bi_iter.bi_sector;
> + logical = start_sector << 9;
> submit_len = dio_bio->bi_iter.bi_size;
>
> do {
> - ret = btrfs_get_io_geometry(fs_info, btrfs_op(dio_bio),
> - start_sector << 9, submit_len,
> + em = btrfs_get_chunk_map(fs_info, logical, submit_len);
> + if (IS_ERR(em)) {
> + status = errno_to_blk_status(PTR_ERR(em));
> + em = NULL;
> + goto out_err;
> + }
> + ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(dio_bio),
> + logical, submit_len,
> &geom);
> if (ret) {
> status = errno_to_blk_status(ret);
> @@ -8030,12 +8047,15 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
> clone_offset += clone_len;
> start_sector += clone_len >> 9;
> file_offset += clone_len;
> +
> + free_extent_map(em);
> } while (submit_len > 0);
> return BLK_QC_T_NONE;
>
> out_err:
> dip->dio_bio->bi_status = status;
> btrfs_dio_private_put(dip);
> + free_extent_map(em);
> return BLK_QC_T_NONE;
> }

For example in this function you increase complexity by having to deal
with free_extent_map as well so I'm not sure this is a net-win.

>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index a8ec8539cd8d..4c753b17c0a2 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -5940,23 +5940,24 @@ static bool need_full_stripe(enum btrfs_map_op op)
> }
>
> /*
> - * btrfs_get_io_geometry - calculates the geomery of a particular (address, len)
> + * btrfs_get_io_geometry - calculates the geometry of a particular (address, len)
> * tuple. This information is used to calculate how big a
> * particular bio can get before it straddles a stripe.
> *
> - * @fs_info - the filesystem
> - * @logical - address that we want to figure out the geometry of
> - * @len - the length of IO we are going to perform, starting at @logical
> - * @op - type of operation - write or read
> - * @io_geom - pointer used to return values
> + * @fs_info: the filesystem
> + * @em: mapping containing the logical extent
> + * @op: type of operation - write or read
> + * @logical: address that we want to figure out the geometry of
> + * @len: the length of IO we are going to perform, starting at @logical
> + * @io_geom: pointer used to return values
> *
> * Returns < 0 in case a chunk for the given logical address cannot be found,
> * usually shouldn't happen unless @logical is corrupted, 0 otherwise.
> */
> -int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> - u64 logical, u64 len, struct btrfs_io_geometry *io_geom)
> +int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em,
> + enum btrfs_map_op op, u64 logical, u64 len,
> + struct btrfs_io_geometry *io_geom)
> {
> - struct extent_map *em;
> struct map_lookup *map;
> u64 offset;
> u64 stripe_offset;
> @@ -5964,14 +5965,9 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> u64 stripe_len;
> u64 raid56_full_stripe_start = (u64)-1;
> int data_stripes;
> - int ret = 0;
>
> ASSERT(op != BTRFS_MAP_DISCARD);
>
> - em = btrfs_get_chunk_map(fs_info, logical, len);
> - if (IS_ERR(em))
> - return PTR_ERR(em);
> -
> map = em->map_lookup;
> /* Offset of this logical address in the chunk */
> offset = logical - em->start;
> @@ -5985,8 +5981,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> btrfs_crit(fs_info,
> "stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
> stripe_offset, offset, em->start, logical, stripe_len);
> - ret = -EINVAL;
> - goto out;
> + return -EINVAL;
> }
>
> /* stripe_offset is the offset of this block in its stripe */
> @@ -6033,10 +6028,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> io_geom->stripe_offset = stripe_offset;
> io_geom->raid56_stripe_offset = raid56_full_stripe_start;
>
> -out:
> - /* once for us */
> - free_extent_map(em);
> - return ret;
> + return 0;
> }

Effectively, what's going on is you are pulling complexity from
btrfs_get_io_geometry and putting it into its 2 callers which is IMO bad.

So unless you can demonstrate this is indeed affecting performance I'd
be inclined to NAK it.

2021-01-28 10:42:11

by Filipe Manana

[permalink] [raw]
Subject: Re: [PATCH v2] btrfs: Avoid calling btrfs_get_chunk_map() twice

On Thu, Jan 28, 2021 at 12:01 AM Michal Rostecki <[email protected]> wrote:
>
> From: Michal Rostecki <[email protected]>
>
> Before this change, the btrfs_get_io_geometry() function was calling
> btrfs_get_chunk_map() to get the extent mapping, necessary for
> calculating the I/O geometry. It was using that extent mapping only
> internally and freeing the pointer after its execution.
>
> That resulted in calling btrfs_get_chunk_map() de facto twice by the
> __btrfs_map_block() function. It was calling btrfs_get_io_geometry()
> first and then calling btrfs_get_chunk_map() directly to get the extent
> mapping, used by the rest of the function.
>
> This change fixes that by passing the extent mapping to the
> btrfs_get_io_geometry() function as an argument.
>
> v2:
> When btrfs_get_chunk_map() returns an error in btrfs_submit_direct():
> - Use errno_to_blk_status(PTR_ERR(em)) as the status
> - Set em to NULL
>
> Signed-off-by: Michal Rostecki <[email protected]>

Reviewed-by: Filipe Manana <[email protected]>

I think this is a fine optimization.
For very large filesystems, i.e. several thousands of allocated
chunks, not only this avoids searching two times the rbtree,
saving time, it may also help reducing contention on the lock that
protects the tree - thinking of writeback starting for multiple
inodes,
other tasks allocating or removing chunks, and anything else that
requires access to the rbtree.

Thanks, it looks good to me now.

> ---
> fs/btrfs/inode.c | 38 +++++++++++++++++++++++++++++---------
> fs/btrfs/volumes.c | 39 ++++++++++++++++-----------------------
> fs/btrfs/volumes.h | 5 +++--
> 3 files changed, 48 insertions(+), 34 deletions(-)
>
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 0dbe1aaa0b71..e2ee3a9c1140 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -2183,9 +2183,10 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
> struct inode *inode = page->mapping->host;
> struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> u64 logical = bio->bi_iter.bi_sector << 9;
> + struct extent_map *em;
> u64 length = 0;
> u64 map_length;
> - int ret;
> + int ret = 0;
> struct btrfs_io_geometry geom;
>
> if (bio_flags & EXTENT_BIO_COMPRESSED)
> @@ -2193,14 +2194,21 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
>
> length = bio->bi_iter.bi_size;
> map_length = length;
> - ret = btrfs_get_io_geometry(fs_info, btrfs_op(bio), logical, map_length,
> - &geom);
> + em = btrfs_get_chunk_map(fs_info, logical, map_length);
> + if (IS_ERR(em))
> + return PTR_ERR(em);
> + ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio), logical,
> + map_length, &geom);
> if (ret < 0)
> - return ret;
> + goto out;
>
> - if (geom.len < length + size)
> - return 1;
> - return 0;
> + if (geom.len < length + size) {
> + ret = 1;
> + goto out;
> + }
> +out:
> + free_extent_map(em);
> + return ret;
> }
>
> /*
> @@ -7941,10 +7949,12 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
> u64 submit_len;
> int clone_offset = 0;
> int clone_len;
> + int logical;
> int ret;
> blk_status_t status;
> struct btrfs_io_geometry geom;
> struct btrfs_dio_data *dio_data = iomap->private;
> + struct extent_map *em;
>
> dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
> if (!dip) {
> @@ -7970,11 +7980,18 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
> }
>
> start_sector = dio_bio->bi_iter.bi_sector;
> + logical = start_sector << 9;
> submit_len = dio_bio->bi_iter.bi_size;
>
> do {
> - ret = btrfs_get_io_geometry(fs_info, btrfs_op(dio_bio),
> - start_sector << 9, submit_len,
> + em = btrfs_get_chunk_map(fs_info, logical, submit_len);
> + if (IS_ERR(em)) {
> + status = errno_to_blk_status(PTR_ERR(em));
> + em = NULL;
> + goto out_err;
> + }
> + ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(dio_bio),
> + logical, submit_len,
> &geom);
> if (ret) {
> status = errno_to_blk_status(ret);
> @@ -8030,12 +8047,15 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
> clone_offset += clone_len;
> start_sector += clone_len >> 9;
> file_offset += clone_len;
> +
> + free_extent_map(em);
> } while (submit_len > 0);
> return BLK_QC_T_NONE;
>
> out_err:
> dip->dio_bio->bi_status = status;
> btrfs_dio_private_put(dip);
> + free_extent_map(em);
> return BLK_QC_T_NONE;
> }
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index a8ec8539cd8d..4c753b17c0a2 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -5940,23 +5940,24 @@ static bool need_full_stripe(enum btrfs_map_op op)
> }
>
> /*
> - * btrfs_get_io_geometry - calculates the geomery of a particular (address, len)
> + * btrfs_get_io_geometry - calculates the geometry of a particular (address, len)
> * tuple. This information is used to calculate how big a
> * particular bio can get before it straddles a stripe.
> *
> - * @fs_info - the filesystem
> - * @logical - address that we want to figure out the geometry of
> - * @len - the length of IO we are going to perform, starting at @logical
> - * @op - type of operation - write or read
> - * @io_geom - pointer used to return values
> + * @fs_info: the filesystem
> + * @em: mapping containing the logical extent
> + * @op: type of operation - write or read
> + * @logical: address that we want to figure out the geometry of
> + * @len: the length of IO we are going to perform, starting at @logical
> + * @io_geom: pointer used to return values
> *
> * Returns < 0 in case a chunk for the given logical address cannot be found,
> * usually shouldn't happen unless @logical is corrupted, 0 otherwise.
> */
> -int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> - u64 logical, u64 len, struct btrfs_io_geometry *io_geom)
> +int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em,
> + enum btrfs_map_op op, u64 logical, u64 len,
> + struct btrfs_io_geometry *io_geom)
> {
> - struct extent_map *em;
> struct map_lookup *map;
> u64 offset;
> u64 stripe_offset;
> @@ -5964,14 +5965,9 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> u64 stripe_len;
> u64 raid56_full_stripe_start = (u64)-1;
> int data_stripes;
> - int ret = 0;
>
> ASSERT(op != BTRFS_MAP_DISCARD);
>
> - em = btrfs_get_chunk_map(fs_info, logical, len);
> - if (IS_ERR(em))
> - return PTR_ERR(em);
> -
> map = em->map_lookup;
> /* Offset of this logical address in the chunk */
> offset = logical - em->start;
> @@ -5985,8 +5981,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> btrfs_crit(fs_info,
> "stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
> stripe_offset, offset, em->start, logical, stripe_len);
> - ret = -EINVAL;
> - goto out;
> + return -EINVAL;
> }
>
> /* stripe_offset is the offset of this block in its stripe */
> @@ -6033,10 +6028,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> io_geom->stripe_offset = stripe_offset;
> io_geom->raid56_stripe_offset = raid56_full_stripe_start;
>
> -out:
> - /* once for us */
> - free_extent_map(em);
> - return ret;
> + return 0;
> }
>
> static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> @@ -6069,12 +6061,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> ASSERT(bbio_ret);
> ASSERT(op != BTRFS_MAP_DISCARD);
>
> - ret = btrfs_get_io_geometry(fs_info, op, logical, *length, &geom);
> + em = btrfs_get_chunk_map(fs_info, logical, *length);
> + ASSERT(!IS_ERR(em));
> +
> + ret = btrfs_get_io_geometry(fs_info, em, op, logical, *length, &geom);
> if (ret < 0)
> return ret;
>
> - em = btrfs_get_chunk_map(fs_info, logical, *length);
> - ASSERT(!IS_ERR(em));
> map = em->map_lookup;
>
> *length = geom.len;
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index c43663d9c22e..04e2b26823c2 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -440,8 +440,9 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> u64 logical, u64 *length,
> struct btrfs_bio **bbio_ret);
> -int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> - u64 logical, u64 len, struct btrfs_io_geometry *io_geom);
> +int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *map,
> + enum btrfs_map_op op, u64 logical, u64 len,
> + struct btrfs_io_geometry *io_geom);
> int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
> int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
> int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type);
> --
> 2.30.0
>


--
Filipe David Manana,

“Whether you think you can, or you think you can't — you're right.”

2021-01-28 11:04:28

by David Sterba

[permalink] [raw]
Subject: Re: [PATCH v2] btrfs: Avoid calling btrfs_get_chunk_map() twice

On Thu, Jan 28, 2021 at 10:38:45AM +0000, Filipe Manana wrote:
> On Thu, Jan 28, 2021 at 12:01 AM Michal Rostecki <[email protected]> wrote:
> >
> > From: Michal Rostecki <[email protected]>
> >
> > Before this change, the btrfs_get_io_geometry() function was calling
> > btrfs_get_chunk_map() to get the extent mapping, necessary for
> > calculating the I/O geometry. It was using that extent mapping only
> > internally and freeing the pointer after its execution.
> >
> > That resulted in calling btrfs_get_chunk_map() de facto twice by the
> > __btrfs_map_block() function. It was calling btrfs_get_io_geometry()
> > first and then calling btrfs_get_chunk_map() directly to get the extent
> > mapping, used by the rest of the function.
> >
> > This change fixes that by passing the extent mapping to the
> > btrfs_get_io_geometry() function as an argument.
> >
> > v2:
> > When btrfs_get_chunk_map() returns an error in btrfs_submit_direct():
> > - Use errno_to_blk_status(PTR_ERR(em)) as the status
> > - Set em to NULL
> >
> > Signed-off-by: Michal Rostecki <[email protected]>
>
> Reviewed-by: Filipe Manana <[email protected]>
>
> I think this is a fine optimization.
> For very large filesystems, i.e. several thousands of allocated
> chunks, not only this avoids searching two times the rbtree,
> saving time, it may also help reducing contention on the lock that
> protects the tree - thinking of writeback starting for multiple
> inodes,
> other tasks allocating or removing chunks, and anything else that
> requires access to the rbtree.

This should answer Nikolay's concerns if shifting around the parameters
is worth it. Caching values that could be expensive to read makes sense
to me and it's not that intrusive in the code. I'll add your analysis to
the changelog and incorporate the fixups that Nikolay suggested in v1.
Thanks.

2021-01-28 11:12:17

by David Sterba

[permalink] [raw]
Subject: Re: [PATCH v2] btrfs: Avoid calling btrfs_get_chunk_map() twice

On Wed, Jan 27, 2021 at 02:57:27PM +0100, Michal Rostecki wrote:
> From: Michal Rostecki <[email protected]>
>
> Before this change, the btrfs_get_io_geometry() function was calling
> btrfs_get_chunk_map() to get the extent mapping, necessary for
> calculating the I/O geometry. It was using that extent mapping only
> internally and freeing the pointer after its execution.
>
> That resulted in calling btrfs_get_chunk_map() de facto twice by the
> __btrfs_map_block() function. It was calling btrfs_get_io_geometry()
> first and then calling btrfs_get_chunk_map() directly to get the extent
> mapping, used by the rest of the function.
>
> This change fixes that by passing the extent mapping to the
> btrfs_get_io_geometry() function as an argument.
>
> v2:
> When btrfs_get_chunk_map() returns an error in btrfs_submit_direct():
> - Use errno_to_blk_status(PTR_ERR(em)) as the status
> - Set em to NULL

The version-to-version changelog belongs under the -- line. If there's
something relevant in v2 it should be put into the proper changelog but
normal fixups like 'set em to NULL' do not have the long-term value that
we want to record in the changelog.

> Signed-off-by: Michal Rostecki <[email protected]>
> ---
> fs/btrfs/inode.c | 38 +++++++++++++++++++++++++++++---------
> fs/btrfs/volumes.c | 39 ++++++++++++++++-----------------------
> fs/btrfs/volumes.h | 5 +++--
> 3 files changed, 48 insertions(+), 34 deletions(-)
>
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 0dbe1aaa0b71..e2ee3a9c1140 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -2183,9 +2183,10 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
> struct inode *inode = page->mapping->host;
> struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> u64 logical = bio->bi_iter.bi_sector << 9;
> + struct extent_map *em;
> u64 length = 0;
> u64 map_length;
> - int ret;
> + int ret = 0;
> struct btrfs_io_geometry geom;
>
> if (bio_flags & EXTENT_BIO_COMPRESSED)
> @@ -2193,14 +2194,21 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
>
> length = bio->bi_iter.bi_size;
> map_length = length;
> - ret = btrfs_get_io_geometry(fs_info, btrfs_op(bio), logical, map_length,
> - &geom);
> + em = btrfs_get_chunk_map(fs_info, logical, map_length);
> + if (IS_ERR(em))
> + return PTR_ERR(em);
> + ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio), logical,
> + map_length, &geom);
> if (ret < 0)
> - return ret;
> + goto out;
>
> - if (geom.len < length + size)
> - return 1;
> - return 0;
> + if (geom.len < length + size) {
> + ret = 1;
> + goto out;
> + }
> +out:
> + free_extent_map(em);
> + return ret;
> }
>
> /*
> @@ -7941,10 +7949,12 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
> u64 submit_len;
> int clone_offset = 0;
> int clone_len;
> + int logical;

This needs to be u64

> int ret;
> blk_status_t status;
> struct btrfs_io_geometry geom;
> struct btrfs_dio_data *dio_data = iomap->private;
> + struct extent_map *em;
>
> dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
> if (!dip) {
> @@ -7970,11 +7980,18 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
> }
>
> start_sector = dio_bio->bi_iter.bi_sector;
> + logical = start_sector << 9;

Otherwise this overflows on logical address larger than 2^23 which is 8G.

2021-01-29 16:36:14

by Josef Bacik

[permalink] [raw]
Subject: Re: [PATCH v2] btrfs: Avoid calling btrfs_get_chunk_map() twice

On 1/27/21 8:57 AM, Michal Rostecki wrote:
> From: Michal Rostecki <[email protected]>
>
> Before this change, the btrfs_get_io_geometry() function was calling
> btrfs_get_chunk_map() to get the extent mapping, necessary for
> calculating the I/O geometry. It was using that extent mapping only
> internally and freeing the pointer after its execution.
>
> That resulted in calling btrfs_get_chunk_map() de facto twice by the
> __btrfs_map_block() function. It was calling btrfs_get_io_geometry()
> first and then calling btrfs_get_chunk_map() directly to get the extent
> mapping, used by the rest of the function.
>
> This change fixes that by passing the extent mapping to the
> btrfs_get_io_geometry() function as an argument.
>
> v2:
> When btrfs_get_chunk_map() returns an error in btrfs_submit_direct():
> - Use errno_to_blk_status(PTR_ERR(em)) as the status
> - Set em to NULL
>
> Signed-off-by: Michal Rostecki <[email protected]>

This panic'ed all of my test vms in their overnight xfstests runs, the panic is this

[ 2449.936502] BTRFS critical (device dm-7): mapping failed logical 1113825280
bio len 40960 len 24576
[ 2449.937073] ------------[ cut here ]------------
[ 2449.937329] kernel BUG at fs/btrfs/volumes.c:6450!
[ 2449.937604] invalid opcode: 0000 [#1] SMP NOPTI
[ 2449.937855] CPU: 0 PID: 259045 Comm: kworker/u5:0 Not tainted 5.11.0-rc5+ #122
[ 2449.938252] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
1.13.0-2.fc32 04/01/2014
[ 2449.938713] Workqueue: btrfs-worker-high btrfs_work_helper
[ 2449.939016] RIP: 0010:btrfs_map_bio.cold+0x5a/0x5c
[ 2449.939392] Code: 37 87 ff ff e8 ed d4 8a ff 48 83 c4 18 e9 b5 52 8b ff 49 89
c8 4c 89 fa 4c 89 f1 48 c7 c6 b0 c0 61 8b 48 89 ef e8 11 87 ff ff <0f> 0b 4c 89
e7 e8 42 09 86 ff e9 fd 59 8b ff 49 8b 7a 50 44 89 f2
[ 2449.940402] RSP: 0000:ffff9f24c1637d90 EFLAGS: 00010282
[ 2449.940689] RAX: 0000000000000057 RBX: ffff90c78ff716b8 RCX: 0000000000000000
[ 2449.941080] RDX: ffff90c7fbc27ae0 RSI: ffff90c7fbc19110 RDI: ffff90c7fbc19110
[ 2449.941467] RBP: ffff90c7911d4000 R08: 0000000000000000 R09: 0000000000000000
[ 2449.941853] R10: ffff9f24c1637b48 R11: ffffffff8b9723e8 R12: 0000000000000000
[ 2449.942243] R13: 0000000000000000 R14: 000000000000a000 R15: 000000004263a000
[ 2449.942632] FS: 0000000000000000(0000) GS:ffff90c7fbc00000(0000)
knlGS:0000000000000000
[ 2449.943072] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2449.943386] CR2: 00005575163c3080 CR3: 000000010ad6c004 CR4: 0000000000370ef0
[ 2449.943772] Call Trace:
[ 2449.943915] ? lock_release+0x1c3/0x290
[ 2449.944135] run_one_async_done+0x3a/0x60
[ 2449.944360] btrfs_work_helper+0x136/0x520
[ 2449.944588] process_one_work+0x26e/0x570
[ 2449.944812] worker_thread+0x55/0x3c0
[ 2449.945016] ? process_one_work+0x570/0x570
[ 2449.945250] kthread+0x137/0x150
[ 2449.945430] ? __kthread_bind_mask+0x60/0x60
[ 2449.945666] ret_from_fork+0x1f/0x30

it happens when you run btrfs/060. Please make sure to run xfstests against
patches before you submit them upstream. Thanks,

Josef

2021-01-29 17:22:22

by Michal Rostecki

[permalink] [raw]
Subject: Re: [PATCH v2] btrfs: Avoid calling btrfs_get_chunk_map() twice

On Fri, Jan 29, 2021 at 11:22:48AM -0500, Josef Bacik wrote:
> On 1/27/21 8:57 AM, Michal Rostecki wrote:
> > From: Michal Rostecki <[email protected]>
> >
> > Before this change, the btrfs_get_io_geometry() function was calling
> > btrfs_get_chunk_map() to get the extent mapping, necessary for
> > calculating the I/O geometry. It was using that extent mapping only
> > internally and freeing the pointer after its execution.
> >
> > That resulted in calling btrfs_get_chunk_map() de facto twice by the
> > __btrfs_map_block() function. It was calling btrfs_get_io_geometry()
> > first and then calling btrfs_get_chunk_map() directly to get the extent
> > mapping, used by the rest of the function.
> >
> > This change fixes that by passing the extent mapping to the
> > btrfs_get_io_geometry() function as an argument.
> >
> > v2:
> > When btrfs_get_chunk_map() returns an error in btrfs_submit_direct():
> > - Use errno_to_blk_status(PTR_ERR(em)) as the status
> > - Set em to NULL
> >
> > Signed-off-by: Michal Rostecki <[email protected]>
>
> This panic'ed all of my test vms in their overnight xfstests runs, the panic is this
>
> [ 2449.936502] BTRFS critical (device dm-7): mapping failed logical
> 1113825280 bio len 40960 len 24576
> [ 2449.937073] ------------[ cut here ]------------
> [ 2449.937329] kernel BUG at fs/btrfs/volumes.c:6450!
> [ 2449.937604] invalid opcode: 0000 [#1] SMP NOPTI
> [ 2449.937855] CPU: 0 PID: 259045 Comm: kworker/u5:0 Not tainted 5.11.0-rc5+ #122
> [ 2449.938252] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
> 1.13.0-2.fc32 04/01/2014
> [ 2449.938713] Workqueue: btrfs-worker-high btrfs_work_helper
> [ 2449.939016] RIP: 0010:btrfs_map_bio.cold+0x5a/0x5c
> [ 2449.939392] Code: 37 87 ff ff e8 ed d4 8a ff 48 83 c4 18 e9 b5 52 8b ff
> 49 89 c8 4c 89 fa 4c 89 f1 48 c7 c6 b0 c0 61 8b 48 89 ef e8 11 87 ff ff <0f>
> 0b 4c 89 e7 e8 42 09 86 ff e9 fd 59 8b ff 49 8b 7a 50 44 89 f2
> [ 2449.940402] RSP: 0000:ffff9f24c1637d90 EFLAGS: 00010282
> [ 2449.940689] RAX: 0000000000000057 RBX: ffff90c78ff716b8 RCX: 0000000000000000
> [ 2449.941080] RDX: ffff90c7fbc27ae0 RSI: ffff90c7fbc19110 RDI: ffff90c7fbc19110
> [ 2449.941467] RBP: ffff90c7911d4000 R08: 0000000000000000 R09: 0000000000000000
> [ 2449.941853] R10: ffff9f24c1637b48 R11: ffffffff8b9723e8 R12: 0000000000000000
> [ 2449.942243] R13: 0000000000000000 R14: 000000000000a000 R15: 000000004263a000
> [ 2449.942632] FS: 0000000000000000(0000) GS:ffff90c7fbc00000(0000)
> knlGS:0000000000000000
> [ 2449.943072] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 2449.943386] CR2: 00005575163c3080 CR3: 000000010ad6c004 CR4: 0000000000370ef0
> [ 2449.943772] Call Trace:
> [ 2449.943915] ? lock_release+0x1c3/0x290
> [ 2449.944135] run_one_async_done+0x3a/0x60
> [ 2449.944360] btrfs_work_helper+0x136/0x520
> [ 2449.944588] process_one_work+0x26e/0x570
> [ 2449.944812] worker_thread+0x55/0x3c0
> [ 2449.945016] ? process_one_work+0x570/0x570
> [ 2449.945250] kthread+0x137/0x150
> [ 2449.945430] ? __kthread_bind_mask+0x60/0x60
> [ 2449.945666] ret_from_fork+0x1f/0x30
>
> it happens when you run btrfs/060. Please make sure to run xfstests against
> patches before you submit them upstream. Thanks,
>
> Josef

Umm... I ran the xftests against v1 patch and didn't get that panic.
I'll try to reproduce and fix that now. Thanks for the heads up and
sorry!

Thanks,
Michal

2021-01-29 17:53:03

by David Sterba

[permalink] [raw]
Subject: Re: [PATCH v2] btrfs: Avoid calling btrfs_get_chunk_map() twice

On Fri, Jan 29, 2021 at 05:15:21PM +0000, Michal Rostecki wrote:
> On Fri, Jan 29, 2021 at 11:22:48AM -0500, Josef Bacik wrote:
> > On 1/27/21 8:57 AM, Michal Rostecki wrote:
> > > From: Michal Rostecki <[email protected]>
> > >
> > > Before this change, the btrfs_get_io_geometry() function was calling
> > > btrfs_get_chunk_map() to get the extent mapping, necessary for
> > > calculating the I/O geometry. It was using that extent mapping only
> > > internally and freeing the pointer after its execution.
> > >
> > > That resulted in calling btrfs_get_chunk_map() de facto twice by the
> > > __btrfs_map_block() function. It was calling btrfs_get_io_geometry()
> > > first and then calling btrfs_get_chunk_map() directly to get the extent
> > > mapping, used by the rest of the function.
> > >
> > > This change fixes that by passing the extent mapping to the
> > > btrfs_get_io_geometry() function as an argument.
> > >
> > > v2:
> > > When btrfs_get_chunk_map() returns an error in btrfs_submit_direct():
> > > - Use errno_to_blk_status(PTR_ERR(em)) as the status
> > > - Set em to NULL
> > >
> > > Signed-off-by: Michal Rostecki <[email protected]>
> >
> > This panic'ed all of my test vms in their overnight xfstests runs, the panic is this
> >
> > [ 2449.936502] BTRFS critical (device dm-7): mapping failed logical
> > 1113825280 bio len 40960 len 24576
> > [ 2449.937073] ------------[ cut here ]------------
> > [ 2449.937329] kernel BUG at fs/btrfs/volumes.c:6450!
> > [ 2449.937604] invalid opcode: 0000 [#1] SMP NOPTI
> > [ 2449.937855] CPU: 0 PID: 259045 Comm: kworker/u5:0 Not tainted 5.11.0-rc5+ #122
> > [ 2449.938252] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
> > 1.13.0-2.fc32 04/01/2014
> > [ 2449.938713] Workqueue: btrfs-worker-high btrfs_work_helper
> > [ 2449.939016] RIP: 0010:btrfs_map_bio.cold+0x5a/0x5c
> > [ 2449.939392] Code: 37 87 ff ff e8 ed d4 8a ff 48 83 c4 18 e9 b5 52 8b ff
> > 49 89 c8 4c 89 fa 4c 89 f1 48 c7 c6 b0 c0 61 8b 48 89 ef e8 11 87 ff ff <0f>
> > 0b 4c 89 e7 e8 42 09 86 ff e9 fd 59 8b ff 49 8b 7a 50 44 89 f2
> > [ 2449.940402] RSP: 0000:ffff9f24c1637d90 EFLAGS: 00010282
> > [ 2449.940689] RAX: 0000000000000057 RBX: ffff90c78ff716b8 RCX: 0000000000000000
> > [ 2449.941080] RDX: ffff90c7fbc27ae0 RSI: ffff90c7fbc19110 RDI: ffff90c7fbc19110
> > [ 2449.941467] RBP: ffff90c7911d4000 R08: 0000000000000000 R09: 0000000000000000
> > [ 2449.941853] R10: ffff9f24c1637b48 R11: ffffffff8b9723e8 R12: 0000000000000000
> > [ 2449.942243] R13: 0000000000000000 R14: 000000000000a000 R15: 000000004263a000
> > [ 2449.942632] FS: 0000000000000000(0000) GS:ffff90c7fbc00000(0000)
> > knlGS:0000000000000000
> > [ 2449.943072] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [ 2449.943386] CR2: 00005575163c3080 CR3: 000000010ad6c004 CR4: 0000000000370ef0
> > [ 2449.943772] Call Trace:
> > [ 2449.943915] ? lock_release+0x1c3/0x290
> > [ 2449.944135] run_one_async_done+0x3a/0x60
> > [ 2449.944360] btrfs_work_helper+0x136/0x520
> > [ 2449.944588] process_one_work+0x26e/0x570
> > [ 2449.944812] worker_thread+0x55/0x3c0
> > [ 2449.945016] ? process_one_work+0x570/0x570
> > [ 2449.945250] kthread+0x137/0x150
> > [ 2449.945430] ? __kthread_bind_mask+0x60/0x60
> > [ 2449.945666] ret_from_fork+0x1f/0x30
> >
> > it happens when you run btrfs/060. Please make sure to run xfstests against
> > patches before you submit them upstream. Thanks,
> >
> > Josef
>
> Umm... I ran the xftests against v1 patch and didn't get that panic.

It could have been caused by my fixups to v2 and I can reproduce the
crash now too. I can't see any difference besides the u64/int switch and
the 'goto out' removal in btrfs_bio_fits_in_stripe.

2021-01-29 19:03:10

by Filipe Manana

[permalink] [raw]
Subject: Re: [PATCH v2] btrfs: Avoid calling btrfs_get_chunk_map() twice

On Fri, Jan 29, 2021 at 5:54 PM David Sterba <[email protected]> wrote:
>
> On Fri, Jan 29, 2021 at 05:15:21PM +0000, Michal Rostecki wrote:
> > On Fri, Jan 29, 2021 at 11:22:48AM -0500, Josef Bacik wrote:
> > > On 1/27/21 8:57 AM, Michal Rostecki wrote:
> > > > From: Michal Rostecki <[email protected]>
> > > >
> > > > Before this change, the btrfs_get_io_geometry() function was calling
> > > > btrfs_get_chunk_map() to get the extent mapping, necessary for
> > > > calculating the I/O geometry. It was using that extent mapping only
> > > > internally and freeing the pointer after its execution.
> > > >
> > > > That resulted in calling btrfs_get_chunk_map() de facto twice by the
> > > > __btrfs_map_block() function. It was calling btrfs_get_io_geometry()
> > > > first and then calling btrfs_get_chunk_map() directly to get the extent
> > > > mapping, used by the rest of the function.
> > > >
> > > > This change fixes that by passing the extent mapping to the
> > > > btrfs_get_io_geometry() function as an argument.
> > > >
> > > > v2:
> > > > When btrfs_get_chunk_map() returns an error in btrfs_submit_direct():
> > > > - Use errno_to_blk_status(PTR_ERR(em)) as the status
> > > > - Set em to NULL
> > > >
> > > > Signed-off-by: Michal Rostecki <[email protected]>
> > >
> > > This panic'ed all of my test vms in their overnight xfstests runs, the panic is this
> > >
> > > [ 2449.936502] BTRFS critical (device dm-7): mapping failed logical
> > > 1113825280 bio len 40960 len 24576
> > > [ 2449.937073] ------------[ cut here ]------------
> > > [ 2449.937329] kernel BUG at fs/btrfs/volumes.c:6450!
> > > [ 2449.937604] invalid opcode: 0000 [#1] SMP NOPTI
> > > [ 2449.937855] CPU: 0 PID: 259045 Comm: kworker/u5:0 Not tainted 5.11.0-rc5+ #122
> > > [ 2449.938252] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
> > > 1.13.0-2.fc32 04/01/2014
> > > [ 2449.938713] Workqueue: btrfs-worker-high btrfs_work_helper
> > > [ 2449.939016] RIP: 0010:btrfs_map_bio.cold+0x5a/0x5c
> > > [ 2449.939392] Code: 37 87 ff ff e8 ed d4 8a ff 48 83 c4 18 e9 b5 52 8b ff
> > > 49 89 c8 4c 89 fa 4c 89 f1 48 c7 c6 b0 c0 61 8b 48 89 ef e8 11 87 ff ff <0f>
> > > 0b 4c 89 e7 e8 42 09 86 ff e9 fd 59 8b ff 49 8b 7a 50 44 89 f2
> > > [ 2449.940402] RSP: 0000:ffff9f24c1637d90 EFLAGS: 00010282
> > > [ 2449.940689] RAX: 0000000000000057 RBX: ffff90c78ff716b8 RCX: 0000000000000000
> > > [ 2449.941080] RDX: ffff90c7fbc27ae0 RSI: ffff90c7fbc19110 RDI: ffff90c7fbc19110
> > > [ 2449.941467] RBP: ffff90c7911d4000 R08: 0000000000000000 R09: 0000000000000000
> > > [ 2449.941853] R10: ffff9f24c1637b48 R11: ffffffff8b9723e8 R12: 0000000000000000
> > > [ 2449.942243] R13: 0000000000000000 R14: 000000000000a000 R15: 000000004263a000
> > > [ 2449.942632] FS: 0000000000000000(0000) GS:ffff90c7fbc00000(0000)
> > > knlGS:0000000000000000
> > > [ 2449.943072] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > [ 2449.943386] CR2: 00005575163c3080 CR3: 000000010ad6c004 CR4: 0000000000370ef0
> > > [ 2449.943772] Call Trace:
> > > [ 2449.943915] ? lock_release+0x1c3/0x290
> > > [ 2449.944135] run_one_async_done+0x3a/0x60
> > > [ 2449.944360] btrfs_work_helper+0x136/0x520
> > > [ 2449.944588] process_one_work+0x26e/0x570
> > > [ 2449.944812] worker_thread+0x55/0x3c0
> > > [ 2449.945016] ? process_one_work+0x570/0x570
> > > [ 2449.945250] kthread+0x137/0x150
> > > [ 2449.945430] ? __kthread_bind_mask+0x60/0x60
> > > [ 2449.945666] ret_from_fork+0x1f/0x30
> > >
> > > it happens when you run btrfs/060. Please make sure to run xfstests against
> > > patches before you submit them upstream. Thanks,
> > >
> > > Josef
> >
> > Umm... I ran the xftests against v1 patch and didn't get that panic.
>
> It could have been caused by my fixups to v2 and I can reproduce the
> crash now too. I can't see any difference besides the u64/int switch and
> the 'goto out' removal in btrfs_bio_fits_in_stripe.

The problem is caused by what I mentioned earlier today:

The value of "logical" must be set at the start of each iteration of
the loop at btrfs_submit_direct(),
and not before starting the loop, since the start sector is
incremented at the end of each iteration of the loop.




--
Filipe David Manana,

“Whether you think you can, or you think you can't — you're right.”

2021-01-29 19:06:59

by Michal Rostecki

[permalink] [raw]
Subject: Re: [PATCH v2] btrfs: Avoid calling btrfs_get_chunk_map() twice

On Fri, Jan 29, 2021 at 06:47:53PM +0100, David Sterba wrote:
> On Fri, Jan 29, 2021 at 05:15:21PM +0000, Michal Rostecki wrote:
> > On Fri, Jan 29, 2021 at 11:22:48AM -0500, Josef Bacik wrote:
> > > On 1/27/21 8:57 AM, Michal Rostecki wrote:
> > > > From: Michal Rostecki <[email protected]>
> > > >
> > > > Before this change, the btrfs_get_io_geometry() function was calling
> > > > btrfs_get_chunk_map() to get the extent mapping, necessary for
> > > > calculating the I/O geometry. It was using that extent mapping only
> > > > internally and freeing the pointer after its execution.
> > > >
> > > > That resulted in calling btrfs_get_chunk_map() de facto twice by the
> > > > __btrfs_map_block() function. It was calling btrfs_get_io_geometry()
> > > > first and then calling btrfs_get_chunk_map() directly to get the extent
> > > > mapping, used by the rest of the function.
> > > >
> > > > This change fixes that by passing the extent mapping to the
> > > > btrfs_get_io_geometry() function as an argument.
> > > >
> > > > v2:
> > > > When btrfs_get_chunk_map() returns an error in btrfs_submit_direct():
> > > > - Use errno_to_blk_status(PTR_ERR(em)) as the status
> > > > - Set em to NULL
> > > >
> > > > Signed-off-by: Michal Rostecki <[email protected]>
> > >
> > > This panic'ed all of my test vms in their overnight xfstests runs, the panic is this
> > >
> > > [ 2449.936502] BTRFS critical (device dm-7): mapping failed logical
> > > 1113825280 bio len 40960 len 24576
> > > [ 2449.937073] ------------[ cut here ]------------
> > > [ 2449.937329] kernel BUG at fs/btrfs/volumes.c:6450!
> > > [ 2449.937604] invalid opcode: 0000 [#1] SMP NOPTI
> > > [ 2449.937855] CPU: 0 PID: 259045 Comm: kworker/u5:0 Not tainted 5.11.0-rc5+ #122
> > > [ 2449.938252] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
> > > 1.13.0-2.fc32 04/01/2014
> > > [ 2449.938713] Workqueue: btrfs-worker-high btrfs_work_helper
> > > [ 2449.939016] RIP: 0010:btrfs_map_bio.cold+0x5a/0x5c
> > > [ 2449.939392] Code: 37 87 ff ff e8 ed d4 8a ff 48 83 c4 18 e9 b5 52 8b ff
> > > 49 89 c8 4c 89 fa 4c 89 f1 48 c7 c6 b0 c0 61 8b 48 89 ef e8 11 87 ff ff <0f>
> > > 0b 4c 89 e7 e8 42 09 86 ff e9 fd 59 8b ff 49 8b 7a 50 44 89 f2
> > > [ 2449.940402] RSP: 0000:ffff9f24c1637d90 EFLAGS: 00010282
> > > [ 2449.940689] RAX: 0000000000000057 RBX: ffff90c78ff716b8 RCX: 0000000000000000
> > > [ 2449.941080] RDX: ffff90c7fbc27ae0 RSI: ffff90c7fbc19110 RDI: ffff90c7fbc19110
> > > [ 2449.941467] RBP: ffff90c7911d4000 R08: 0000000000000000 R09: 0000000000000000
> > > [ 2449.941853] R10: ffff9f24c1637b48 R11: ffffffff8b9723e8 R12: 0000000000000000
> > > [ 2449.942243] R13: 0000000000000000 R14: 000000000000a000 R15: 000000004263a000
> > > [ 2449.942632] FS: 0000000000000000(0000) GS:ffff90c7fbc00000(0000)
> > > knlGS:0000000000000000
> > > [ 2449.943072] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > [ 2449.943386] CR2: 00005575163c3080 CR3: 000000010ad6c004 CR4: 0000000000370ef0
> > > [ 2449.943772] Call Trace:
> > > [ 2449.943915] ? lock_release+0x1c3/0x290
> > > [ 2449.944135] run_one_async_done+0x3a/0x60
> > > [ 2449.944360] btrfs_work_helper+0x136/0x520
> > > [ 2449.944588] process_one_work+0x26e/0x570
> > > [ 2449.944812] worker_thread+0x55/0x3c0
> > > [ 2449.945016] ? process_one_work+0x570/0x570
> > > [ 2449.945250] kthread+0x137/0x150
> > > [ 2449.945430] ? __kthread_bind_mask+0x60/0x60
> > > [ 2449.945666] ret_from_fork+0x1f/0x30
> > >
> > > it happens when you run btrfs/060. Please make sure to run xfstests against
> > > patches before you submit them upstream. Thanks,
> > >
> > > Josef
> >
> > Umm... I ran the xftests against v1 patch and didn't get that panic.
>
> It could have been caused by my fixups to v2 and I can reproduce the
> crash now too. I can't see any difference besides the u64/int switch and
> the 'goto out' removal in btrfs_bio_fits_in_stripe.

I was able to fix the issue by the following diff. I will send it as the
patch after confirming that all fstests are passing.

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1a160db01878..04cd95899ac8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7975,21 +7975,21 @@ static blk_qc_t btrfs_submit_direct(struct inode
*inode, struct iomap *iomap,
}

start_sector = dio_bio->bi_iter.bi_sector;
- logical = start_sector << 9;
submit_len = dio_bio->bi_iter.bi_size;

do {
+ logical = start_sector << 9;
em = btrfs_get_chunk_map(fs_info, logical, submit_len);
if (IS_ERR(em)) {
status = errno_to_blk_status(PTR_ERR(em));
em = NULL;
- goto out_err;
+ goto out_err_em;
}
ret = btrfs_get_io_geometry(fs_info, em,
btrfs_op(dio_bio),
logical, submit_len, &geom);
if (ret) {
status = errno_to_blk_status(ret);
- goto out_err;
+ goto out_err_em;
}
ASSERT(geom.len <= INT_MAX);

@@ -8034,7 +8034,7 @@ static blk_qc_t btrfs_submit_direct(struct inode
*inode, struct iomap *iomap,
bio_put(bio);
if (submit_len > 0)
refcount_dec(&dip->refs);
- goto out_err;
+ goto out_err_em;
}

dio_data->submitted += clone_len;
@@ -8046,10 +8046,11 @@ static blk_qc_t btrfs_submit_direct(struct inode
*inode, struct iomap *iomap,
} while (submit_len > 0);
return BLK_QC_T_NONE;

+out_err_em:
+ free_extent_map(em);
out_err:
dip->dio_bio->bi_status = status;
btrfs_dio_private_put(dip);
- free_extent_map(em);

return BLK_QC_T_NONE;
}

2021-01-29 19:46:51

by David Sterba

[permalink] [raw]
Subject: Re: [PATCH v2] btrfs: Avoid calling btrfs_get_chunk_map() twice

On Fri, Jan 29, 2021 at 07:02:41PM +0000, Michal Rostecki wrote:
> On Fri, Jan 29, 2021 at 06:47:53PM +0100, David Sterba wrote:
> > On Fri, Jan 29, 2021 at 05:15:21PM +0000, Michal Rostecki wrote:
> > > On Fri, Jan 29, 2021 at 11:22:48AM -0500, Josef Bacik wrote:
> > > > On 1/27/21 8:57 AM, Michal Rostecki wrote:
> > > > it happens when you run btrfs/060. Please make sure to run xfstests against
> > > > patches before you submit them upstream. Thanks,
> > > >
> > > > Josef
> > >
> > > Umm... I ran the xftests against v1 patch and didn't get that panic.
> >
> > It could have been caused by my fixups to v2 and I can reproduce the
> > crash now too. I can't see any difference besides the u64/int switch and
> > the 'goto out' removal in btrfs_bio_fits_in_stripe.
>
> I was able to fix the issue by the following diff. I will send it as the
> patch after confirming that all fstests are passing.

Thanks, can't reproduce the crash with that at least on test btrfs/060.