From: Dave Chinner <[email protected]>
If args->minlen is not aligned to the constraints of forced
alignment, we may do minlen allocations that are not aligned when we
approach ENOSPC. Avoid this by always aligning args->minlen
appropriately. If alignment of minlen results in a value smaller
than the alignment constraint, fail the allocation immediately.
Signed-off-by: Dave Chinner <[email protected]>
Signed-off-by: John Garry <[email protected]>
---
fs/xfs/libxfs/xfs_bmap.c | 45 +++++++++++++++++++++++++++-------------
1 file changed, 31 insertions(+), 14 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 7a0ef0900097..4f39a43d78a7 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3288,33 +3288,48 @@ xfs_bmap_longest_free_extent(
return 0;
}
-static xfs_extlen_t
+static int
xfs_bmap_select_minlen(
struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args,
xfs_extlen_t blen)
{
-
/* Adjust best length for extent start alignment. */
if (blen > args->alignment)
blen -= args->alignment;
/*
* Since we used XFS_ALLOC_FLAG_TRYLOCK in _longest_free_extent(), it is
- * possible that there is enough contiguous free space for this request.
+ * possible that there is enough contiguous free space for this request
+ * even if best length is less that the minimum length we need.
+ *
+ * If the best length won't satisfy the maximum length we requested,
+ * then use it as the minimum length so we get as large an allocation
+ * as possible.
*/
if (blen < ap->minlen)
- return ap->minlen;
+ blen = ap->minlen;
+ else if (blen > args->maxlen)
+ blen = args->maxlen;
/*
- * If the best seen length is less than the request length,
- * use the best as the minimum, otherwise we've got the maxlen we
- * were asked for.
+ * If we have alignment constraints, round the minlen down to match the
+ * constraint so that alignment will be attempted. This may reduce the
+ * allocation to smaller than was requested, so clamp the minimum to
+ * ap->minlen to allow unaligned allocation to succeed. If we are forced
+ * to align the allocation, return ENOSPC at this point because we don't
+ * have enough contiguous free space to guarantee aligned allocation.
*/
- if (blen < args->maxlen)
- return blen;
- return args->maxlen;
-
+ if (args->alignment > 1) {
+ blen = rounddown(blen, args->alignment);
+ if (blen < ap->minlen) {
+ if (args->datatype & XFS_ALLOC_FORCEALIGN)
+ return -ENOSPC;
+ blen = ap->minlen;
+ }
+ }
+ args->minlen = blen;
+ return 0;
}
static int
@@ -3350,8 +3365,7 @@ xfs_bmap_btalloc_select_lengths(
if (pag)
xfs_perag_rele(pag);
- args->minlen = xfs_bmap_select_minlen(ap, args, blen);
- return error;
+ return xfs_bmap_select_minlen(ap, args, blen);
}
/* Update all inode and quota accounting for the allocation we just did. */
@@ -3671,7 +3685,10 @@ xfs_bmap_btalloc_filestreams(
goto out_low_space;
}
- args->minlen = xfs_bmap_select_minlen(ap, args, blen);
+ error = xfs_bmap_select_minlen(ap, args, blen);
+ if (error)
+ goto out_low_space;
+
if (ap->aeof && ap->offset)
error = xfs_bmap_btalloc_at_eof(ap, args);
--
2.31.1
On 29/04/2024 18:47, John Garry wrote:
> From: Dave Chinner <[email protected]>
>
> If args->minlen is not aligned to the constraints of forced
> alignment, we may do minlen allocations that are not aligned when we
> approach ENOSPC. Avoid this by always aligning args->minlen
> appropriately. If alignment of minlen results in a value smaller
> than the alignment constraint, fail the allocation immediately.
>
> Signed-off-by: Dave Chinner <[email protected]>
> Signed-off-by: John Garry <[email protected]>
> ---
> fs/xfs/libxfs/xfs_bmap.c | 45 +++++++++++++++++++++++++++-------------
> 1 file changed, 31 insertions(+), 14 deletions(-)
>
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index 7a0ef0900097..4f39a43d78a7 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -3288,33 +3288,48 @@ xfs_bmap_longest_free_extent(
> return 0;
> }
>
> -static xfs_extlen_t
> +static int
> xfs_bmap_select_minlen(
> struct xfs_bmalloca *ap,
> struct xfs_alloc_arg *args,
> xfs_extlen_t blen)
> {
> -
> /* Adjust best length for extent start alignment. */
> if (blen > args->alignment)
> blen -= args->alignment;
>
> /*
> * Since we used XFS_ALLOC_FLAG_TRYLOCK in _longest_free_extent(), it is
> - * possible that there is enough contiguous free space for this request.
> + * possible that there is enough contiguous free space for this request
> + * even if best length is less that the minimum length we need.
> + *
> + * If the best length won't satisfy the maximum length we requested,
> + * then use it as the minimum length so we get as large an allocation
> + * as possible.
> */
> if (blen < ap->minlen)
> - return ap->minlen;
> + blen = ap->minlen;
> + else if (blen > args->maxlen)
> + blen = args->maxlen;
>
> /*
> - * If the best seen length is less than the request length,
> - * use the best as the minimum, otherwise we've got the maxlen we
> - * were asked for.
> + * If we have alignment constraints, round the minlen down to match the
> + * constraint so that alignment will be attempted. This may reduce the
> + * allocation to smaller than was requested, so clamp the minimum to
> + * ap->minlen to allow unaligned allocation to succeed. If we are forced
> + * to align the allocation, return ENOSPC at this point because we don't
> + * have enough contiguous free space to guarantee aligned allocation.
> */
> - if (blen < args->maxlen)
> - return blen;
> - return args->maxlen;
> -
> + if (args->alignment > 1) {
> + blen = rounddown(blen, args->alignment);
> + if (blen < ap->minlen) {
> + if (args->datatype & XFS_ALLOC_FORCEALIGN)
> + return -ENOSPC;
> + blen = ap->minlen;
> + }
> + }
Hi Dave,
I still think that there is a problem with this code or some other
allocator code which gives rise to unexpected -ENOSPC. I just highlight
this code, above, as I get an unexpected -ENOSPC failure here when the
fs does have many free (big enough) extents. I think that the problem
may be elsewhere, though.
Initially we have a file like this:
EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL
0: [0..127]: 62592..62719 0 (62592..62719) 128
1: [128..895]: hole 768
2: [896..1023]: 63616..63743 0 (63616..63743) 128
3: [1024..1151]: 64896..65023 0 (64896..65023) 128
4: [1152..1279]: 65664..65791 0 (65664..65791) 128
5: [1280..1407]: 68224..68351 0 (68224..68351) 128
6: [1408..1535]: 76416..76543 0 (76416..76543) 128
7: [1536..1791]: 62720..62975 0 (62720..62975) 256
8: [1792..1919]: 60032..60159 0 (60032..60159) 128
9: [1920..2047]: 63488..63615 0 (63488..63615) 128
10: [2048..2303]: 63744..63999 0 (63744..63999) 256
forcealign extsize is 16 4k fsb, so the layout looks ok.
Then we truncate the file to 454 sectors (or 56.75 fsb). This gives:
EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL
0: [0..127]: 62592..62719 0 (62592..62719) 128
1: [128..455]: hole 328
We have 57 fsb.
Then I attempt to write from byte offset 232448 (454 sector) and a get a
write failure in xfs_bmap_select_minlen() returning -ENOSPC; at that
point the file looks like this:
EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL
0: [0..127]: 62592..62719 0 (62592..62719) 128
1: [128..447]: hole 320
2: [448..575]: 62720..62847 0 (62720..62847) 128
That hole in ext #1 is 40 fsb, and not aligned with forcealign
granularity. This means that ext #2 is misaligned wrt forcealign
granularity.
This is strange.
I notice that we when allocate ext #2, xfs_bmap_btalloc() returns
ap->blkno=7840, length=16, offset=56. I would expect offset % 16 == 0,
which it is not.
In the following sub-io block zeroing, I note that we zero the front
padding from pos=196608 (or fsb 48 or sector 384) for len=35840, and
back padding from pos=263680 for len=64000 (upto sector 640 or fsb 80).
That seems wrong, as we are zeroing data in the ext #1 hole, right?
Now the actual -ENOSPC comes from xfs_bmap_btalloc() -> ... ->
xfs_bmap_select_minlen() with initially blen=32 args->alignment=16
ap->minlen=1 args->maxlen=8. There xfs_bmap_btalloc() has ap->length=8
initially. This may be just a symptom.
With args->maxlen < args->alignment, we fail with -ENOSPC in
xfs_bmap_select_minlen()
I guess that there is something wrong in the block allocator for ext #2.
Any idea where to check?
I'll send a new v4 series soon which has this problem, as to share the
exact full code changes.
Thanks,
John
> + args->minlen = blen;
> + return 0;
> }
>
> static int
> @@ -3350,8 +3365,7 @@ xfs_bmap_btalloc_select_lengths(
> if (pag)
> xfs_perag_rele(pag);
>
> - args->minlen = xfs_bmap_select_minlen(ap, args, blen);
> - return error;
> + return xfs_bmap_select_minlen(ap, args, blen);
> }
>
> /* Update all inode and quota accounting for the allocation we just did. */
> @@ -3671,7 +3685,10 @@ xfs_bmap_btalloc_filestreams(
> goto out_low_space;
> }
>
> - args->minlen = xfs_bmap_select_minlen(ap, args, blen);
> + error = xfs_bmap_select_minlen(ap, args, blen);
> + if (error)
> + goto out_low_space;
> +
> if (ap->aeof && ap->offset)
> error = xfs_bmap_btalloc_at_eof(ap, args);
>
On 06/06/2024 09:47, Dave Chinner wrote:
> On Wed, Jun 05, 2024 at 03:26:11PM +0100, John Garry wrote:
>> Hi Dave,
>>
>> I still think that there is a problem with this code or some other allocator
>> code which gives rise to unexpected -ENOSPC. I just highlight this code,
>> above, as I get an unexpected -ENOSPC failure here when the fs does have
>> many free (big enough) extents. I think that the problem may be elsewhere,
>> though.
>>
>> Initially we have a file like this:
>>
>> EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL
>> 0: [0..127]: 62592..62719 0 (62592..62719) 128
>> 1: [128..895]: hole 768
>> 2: [896..1023]: 63616..63743 0 (63616..63743) 128
>> 3: [1024..1151]: 64896..65023 0 (64896..65023) 128
>> 4: [1152..1279]: 65664..65791 0 (65664..65791) 128
>> 5: [1280..1407]: 68224..68351 0 (68224..68351) 128
>> 6: [1408..1535]: 76416..76543 0 (76416..76543) 128
>> 7: [1536..1791]: 62720..62975 0 (62720..62975) 256
>> 8: [1792..1919]: 60032..60159 0 (60032..60159) 128
>> 9: [1920..2047]: 63488..63615 0 (63488..63615) 128
>> 10: [2048..2303]: 63744..63999 0 (63744..63999) 256
>>
>> forcealign extsize is 16 4k fsb, so the layout looks ok.
>>
>> Then we truncate the file to 454 sectors (or 56.75 fsb). This gives:
>>
>> EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL
>> 0: [0..127]: 62592..62719 0 (62592..62719) 128
>> 1: [128..455]: hole 328
>>
>> We have 57 fsb.
>>
>> Then I attempt to write from byte offset 232448 (454 sector) and a get a
>> write failure in xfs_bmap_select_minlen() returning -ENOSPC; at that point
>> the file looks like this:
>
> So you are doing an unaligned write of some size at EOF and EOF is
> not aligned to the extsize?
Correct
>
>> EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL
>> 0: [0..127]: 62592..62719 0 (62592..62719) 128
>> 1: [128..447]: hole 320
>> 2: [448..575]: 62720..62847 0 (62720..62847) 128
>>
>> That hole in ext #1 is 40 fsb, and not aligned with forcealign granularity.
>> This means that ext #2 is misaligned wrt forcealign granularity.
>
> OK, so the command to produce this would be something like this?
>
> # xfs_io -fd -c "truncate 0" \
> -c "chattr +<forcealign>" -c "extsize 64k" \
> -c "pwrite 0 64k -b 64k" -c "pwrite 448k 64k -b 64k" \
> -c "bmap -vvp" \
> -c "truncate 227k" \
> -c "bmap -vvp" \
> -c "pwrite 227k 64k -b 64k" \
> -c "bmap -vvp" \
> /mnt/scratch/testfile
No, unfortunately not. Well maybe not on a clean filesystem. In my
stress test, something else is causing this. Probably heavy fragmentation.
>
>> This is strange.
>>
>> I notice that we when allocate ext #2, xfs_bmap_btalloc() returns
>> ap->blkno=7840, length=16, offset=56. I would expect offset % 16 == 0, which
>> it is not.
>
> IOWs, the allocation was not correctly rounded down to an aligned
> start offset. What were the initial parameters passed to this
> allocation?
For xfs_bmap_btalloc() entry,
ap->offset=48, length=32, blkno=0, total=0, minlen=1, minleft=1, eof=1,
wasdel=0, aeof=0, conv=0, datatype=5, flags=0x8
> i.e. why didn't it round the start offset down to 48?
> Answering that question will tell you where the bug is.
After xfs_bmap_compute_alignments() -> xfs_bmap_extsize_align(),
ap->offset=48 - that seems ok.
Maybe the problem is in xfs_bmap_process_allocated_extent(). For the
problematic case when calling that function:
args->fsbno=7840 args->len=16 ap->offset=48 orig_offset=56 orig_length=24
So, as the comment reads there, we could not satisfy the original length
request, so we move up the position of the extent.
I assume that we just don't want to do that for forcealign, correct?
>
> Of course, if the allocation start is rounded down to 48, then
> the length should be rounded up to 32 to cover the entire range we
> are writing new data to.
>
>> In the following sub-io block zeroing, I note that we zero the front padding
>> from pos=196608 (or fsb 48 or sector 384) for len=35840, and back padding
>> from pos=263680 for len=64000 (upto sector 640 or fsb 80). That seems wrong,
>> as we are zeroing data in the ext #1 hole, right?
>
> The sub block zeroing is doing exactly the right thing - it is
> demonstrating the exact range that the force aligned allocation
> should have covered.
Agreed
>
>> Now the actual -ENOSPC comes from xfs_bmap_btalloc() -> ... ->
>> xfs_bmap_select_minlen() with initially blen=32 args->alignment=16
>> ap->minlen=1 args->maxlen=8. There xfs_bmap_btalloc() has ap->length=8
>> initially. This may be just a symptom.
>
> Yeah, now the allocator is trying to fix up the mess that the first unaligned
> allocation created, and it's tripping over ENOSPC because it's not
> allowed to do sub-extent size hint allocations when forced alignment
> is enabled....
>
>> I guess that there is something wrong in the block allocator for ext #2. Any
>> idea where to check?
>
> Start with tracing exactly what range iomap is requesting be
> allocated, and then follow that through into the allocator to work
> out why the offset being passed to the allocation never gets rounded
> down to be aligned. There's a mistake in the logic somewhere that is
> failing to apply the start alignment to the allocation request (i.e.
> the bug will be in the allocation setup code path. i.e. somewhere
> in the xfs_bmapi_write -> xfs_bmap_btalloc path *before* we get to
> the xfs_alloc_vextent...() calls.
>
As above, the problem seems in the processing fix-up.
Thanks,
John
On 06/06/2024 17:22, John Garry wrote:
>
>> i.e. why didn't it round the start offset down to 48?
>> Answering that question will tell you where the bug is.
>
> After xfs_bmap_compute_alignments() -> xfs_bmap_extsize_align(),
> ap->offset=48 - that seems ok.
>
> Maybe the problem is in xfs_bmap_process_allocated_extent(). For the
> problematic case when calling that function:
>
> args->fsbno=7840 args->len=16 ap->offset=48 orig_offset=56 orig_length=24
>
> So, as the comment reads there, we could not satisfy the original length
> request, so we move up the position of the extent.
>
> I assume that we just don't want to do that for forcealign, correct?
>
JFYI, after making this following change, my stress test ran overnight:
@@ -3506,13 +3513,15 @@ xfs_bmap_process_allocated_extent(
* very fragmented so we're unlikely to be able to satisfy the
* hints anyway.
*/
+ if (!xfs_inode_has_forcealign(ap->ip)) {
if (ap->length <= orig_length)
ap->offset = orig_offset;
else if (ap->offset + ap->length < orig_offset + orig_length)
ap->offset = orig_offset + orig_length - ap->length;
-
+ }
+
>>
>> Of course, if the allocation start is rounded down to 48, then
>> the length should be rounded up to 32 to cover the entire range we
>> are writing new data to.