For when forcealign is enabled, blocks in an inode need to be unmapped
according to extent alignment, like what is already done for rtvol.
Signed-off-by: John Garry <[email protected]>
---
fs/xfs/libxfs/xfs_bmap.c | 39 +++++++++++++++++++++++++++++++++------
fs/xfs/xfs_inode.h | 5 +++++
2 files changed, 38 insertions(+), 6 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 4f39a43d78a7..4a78ab193753 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5339,6 +5339,15 @@ xfs_bmap_del_extent_real(
return 0;
}
+/* Return the offset of an block number within an extent for forcealign. */
+static xfs_extlen_t
+xfs_forcealign_extent_offset(
+ struct xfs_inode *ip,
+ xfs_fsblock_t bno)
+{
+ return bno & (ip->i_extsize - 1);
+}
+
/*
* Unmap (remove) blocks from a file.
* If nexts is nonzero then the number of extents to remove is limited to
@@ -5361,6 +5370,7 @@ __xfs_bunmapi(
struct xfs_bmbt_irec got; /* current extent record */
struct xfs_ifork *ifp; /* inode fork pointer */
int isrt; /* freeing in rt area */
+ int isforcealign; /* freeing for file inode with forcealign */
int logflags; /* transaction logging flags */
xfs_extlen_t mod; /* rt extent offset */
struct xfs_mount *mp = ip->i_mount;
@@ -5397,7 +5407,10 @@ __xfs_bunmapi(
return 0;
}
XFS_STATS_INC(mp, xs_blk_unmap);
- isrt = xfs_ifork_is_realtime(ip, whichfork);
+ isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
+ isforcealign = (whichfork == XFS_DATA_FORK) &&
+ xfs_inode_has_forcealign(ip) &&
+ xfs_inode_has_extsize(ip) && ip->i_extsize > 1;
end = start + len;
if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
@@ -5459,11 +5472,15 @@ __xfs_bunmapi(
if (del.br_startoff + del.br_blockcount > end + 1)
del.br_blockcount = end + 1 - del.br_startoff;
- if (!isrt || (flags & XFS_BMAPI_REMAP))
+ if ((!isrt && !isforcealign) || (flags & XFS_BMAPI_REMAP))
goto delete;
- mod = xfs_rtb_to_rtxoff(mp,
- del.br_startblock + del.br_blockcount);
+ if (isrt)
+ mod = xfs_rtb_to_rtxoff(mp,
+ del.br_startblock + del.br_blockcount);
+ else if (isforcealign)
+ mod = xfs_forcealign_extent_offset(ip,
+ del.br_startblock + del.br_blockcount);
if (mod) {
/*
* Realtime extent not lined up at the end.
@@ -5511,9 +5528,19 @@ __xfs_bunmapi(
goto nodelete;
}
- mod = xfs_rtb_to_rtxoff(mp, del.br_startblock);
+ if (isrt)
+ mod = xfs_rtb_to_rtxoff(mp, del.br_startblock);
+ else if (isforcealign)
+ mod = xfs_forcealign_extent_offset(ip,
+ del.br_startblock);
+
if (mod) {
- xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
+ xfs_extlen_t off;
+
+ if (isrt)
+ off = mp->m_sb.sb_rextsize - mod;
+ else if (isforcealign)
+ off = ip->i_extsize - mod;
/*
* Realtime extent is lined up at the end but not
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 065028789473..3f13943ab3a3 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -316,6 +316,11 @@ static inline bool xfs_inode_has_forcealign(struct xfs_inode *ip)
return ip->i_diflags2 & XFS_DIFLAG2_FORCEALIGN;
}
+static inline bool xfs_inode_has_extsize(struct xfs_inode *ip)
+{
+ return ip->i_diflags & XFS_DIFLAG_EXTSIZE;
+}
+
/*
* Return the buftarg used for data allocations on a given inode.
*/
--
2.31.1
On 01/05/2024 01:10, Dave Chinner wrote:
> On Mon, Apr 29, 2024 at 05:47:36PM +0000, John Garry wrote:
>> For when forcealign is enabled, blocks in an inode need to be unmapped
>> according to extent alignment, like what is already done for rtvol.
>>
>> Signed-off-by: John Garry <[email protected]>
>> ---
>> fs/xfs/libxfs/xfs_bmap.c | 39 +++++++++++++++++++++++++++++++++------
>> fs/xfs/xfs_inode.h | 5 +++++
>> 2 files changed, 38 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
>> index 4f39a43d78a7..4a78ab193753 100644
>> --- a/fs/xfs/libxfs/xfs_bmap.c
>> +++ b/fs/xfs/libxfs/xfs_bmap.c
>> @@ -5339,6 +5339,15 @@ xfs_bmap_del_extent_real(
>> return 0;
>> }
>>
>> +/* Return the offset of an block number within an extent for forcealign. */
>> +static xfs_extlen_t
>> +xfs_forcealign_extent_offset(
>> + struct xfs_inode *ip,
>> + xfs_fsblock_t bno)
>> +{
>> + return bno & (ip->i_extsize - 1);
>> +}
>> +
>> /*
>> * Unmap (remove) blocks from a file.
>> * If nexts is nonzero then the number of extents to remove is limited to
>> @@ -5361,6 +5370,7 @@ __xfs_bunmapi(
>> struct xfs_bmbt_irec got; /* current extent record */
>> struct xfs_ifork *ifp; /* inode fork pointer */
>> int isrt; /* freeing in rt area */
>> + int isforcealign; /* freeing for file inode with forcealign */
>> int logflags; /* transaction logging flags */
>> xfs_extlen_t mod; /* rt extent offset */
>> struct xfs_mount *mp = ip->i_mount;
>> @@ -5397,7 +5407,10 @@ __xfs_bunmapi(
>> return 0;
>> }
>> XFS_STATS_INC(mp, xs_blk_unmap);
>> - isrt = xfs_ifork_is_realtime(ip, whichfork);
>> + isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
>
> Why did you change this check? What's wrong with
> xfs_ifork_is_realtime(), and if there is something wrong, why
> shouldn't xfs_ifork_is_relatime() get fixed?
oops, I should have not made that change. I must have changed it when
debugging and not reverted it.
>
>> + isforcealign = (whichfork == XFS_DATA_FORK) &&
>> + xfs_inode_has_forcealign(ip) &&
>> + xfs_inode_has_extsize(ip) && ip->i_extsize > 1;
>
> This is one of the reasons why I said xfs_inode_has_forcealign()
> should be checking that extent size hints should be checked in that
> helper....
Right. In this particular case, I found that directories may be
considered as well if we don't check for xfs_inode_has_extsize() (which
we don't want).
>
>> end = start + len;
>>
>> if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
>> @@ -5459,11 +5472,15 @@ __xfs_bunmapi(
>> if (del.br_startoff + del.br_blockcount > end + 1)
>> del.br_blockcount = end + 1 - del.br_startoff;
>>
>> - if (!isrt || (flags & XFS_BMAPI_REMAP))
>> + if ((!isrt && !isforcealign) || (flags & XFS_BMAPI_REMAP))
>> goto delete;
>>
>> - mod = xfs_rtb_to_rtxoff(mp,
>> - del.br_startblock + del.br_blockcount);
>> + if (isrt)
>> + mod = xfs_rtb_to_rtxoff(mp,
>> + del.br_startblock + del.br_blockcount);
>> + else if (isforcealign)
>> + mod = xfs_forcealign_extent_offset(ip,
>> + del.br_startblock + del.br_blockcount);
>
> There's got to be a cleaner way to do this.
>
> We already know that either isrt or isforcealign must be set here,
> so there's no need for the "else if" construct.
right
>
> Also, forcealign should take precedence over realtime, so that
> forcealign will work on realtime devices as well. I'd change this
> code to call a wrapper like:
>
> mod = xfs_bunmapi_align(ip, del.br_startblock + del.br_blockcount);
>
> static xfs_extlen_t
> xfs_bunmapi_align(
> struct xfs_inode *ip,
> xfs_fsblock_t bno)
> {
> if (!XFS_INODE_IS_REALTIME(ip)) {
> ASSERT(xfs_inode_has_forcealign(ip))
> if (is_power_of_2(ip->i_extsize))
> return bno & (ip->i_extsize - 1);
> return do_div(bno, ip->i_extsize);
> }
> return xfs_rtb_to_rtxoff(ip->i_mount, bno);
> }
ok, that's neater
Thanks,
John
Hi Dave,
>>
>> if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
>> @@ -5459,11 +5472,15 @@ __xfs_bunmapi(
>> if (del.br_startoff + del.br_blockcount > end + 1)
>> del.br_blockcount = end + 1 - del.br_startoff;
>>
>> - if (!isrt || (flags & XFS_BMAPI_REMAP))
>> + if ((!isrt && !isforcealign) || (flags & XFS_BMAPI_REMAP))
>> goto delete;
>>
>> - mod = xfs_rtb_to_rtxoff(mp,
>> - del.br_startblock + del.br_blockcount);
>> + if (isrt)
>> + mod = xfs_rtb_to_rtxoff(mp,
>> + del.br_startblock + del.br_blockcount);
>> + else if (isforcealign)
>> + mod = xfs_forcealign_extent_offset(ip,
>> + del.br_startblock + del.br_blockcount);
> There's got to be a cleaner way to do this.
>
> We already know that either isrt or isforcealign must be set here,
> so there's no need for the "else if" construct.
>
> Also, forcealign should take precedence over realtime, so that
> forcealign will work on realtime devices as well. I'd change this
> code to call a wrapper like:
>
> mod = xfs_bunmapi_align(ip, del.br_startblock + del.br_blockcount);
>
> static xfs_extlen_t
> xfs_bunmapi_align(
> struct xfs_inode *ip,
> xfs_fsblock_t bno)
> {
> if (!XFS_INODE_IS_REALTIME(ip)) {
> ASSERT(xfs_inode_has_forcealign(ip))
> if (is_power_of_2(ip->i_extsize))
> return bno & (ip->i_extsize - 1);
> return do_div(bno, ip->i_extsize);
> }
> return xfs_rtb_to_rtxoff(ip->i_mount, bno);
> }
I have made that change according to your suggestion.
However, now that the forcealign power-of-2 extsize restriction has been
lifted, I am finding another bug.
I will just mention this now, as I want to go back to that other issue
https://lore.kernel.org/linux-xfs/[email protected]/T/#mebd7e97dfd0f12219bf92f289c41f62bf2abcff5
However this new one is pretty simple to reproduce.
We create a file:
ext: logical_offset: physical_offset: length: expected: flags:
0: 0.. 1775: 40200.. 41975: 1776: last,eof
/root/mnt2/file_22: 1 extent found
The forcealign extsize is 98304, i.e. 24 blks.
And then try to delete it, and get this:
[ 17.604237] XFS: Assertion failed: tp->t_blk_res > 0, file:
fs/xfs/libxfs/xfs_bmap.c, line: 5599
[ 17.605908] ------------[ cut here ]------------
[ 17.606884] kernel BUG at fs/xfs/xfs_message.c:102!
[ 17.607917] Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
[ 17.609134] CPU: 3 PID: 240 Comm: kworker/3:2 Not tainted
6.10.0-rc1-00096-g759a4497daa7-dirty #2553
[ 17.610606] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014
[ 17.612619] Workqueue: xfs-inodegc/sda xfs_inodegc_worker
[ 17.613682] RIP: 0010:assfail+0x36/0x40
[ 17.614134] Code: c2 18 1e 1d 9d 48 89 f1 48 89 fe 48 c7 c7 43 f2 12
9d e8 7d fd ff ff 80 3d ae 86 8d 01 00 75 09 90 0f 0b 90 c3 cc cc cc cc
90 <0f> 0b 0f 1f 84 00 00 00 00 00 90 90 90 90 900
[ 17.616478] RSP: 0018:ff4887cac0973c28 EFLAGS: 00010202
[ 17.617080] RAX: 0000000000000000 RBX: 0000000000000000 RCX:
000000007fffffff
[ 17.617899] RDX: 0000000000000000 RSI: 0000000000000000 RDI:
ffffffff9d12f243
[ 17.618717] RBP: ff4887cac0973d40 R08: 0000000000000000 R09:
000000000000000a
[ 17.619548] R10: 000000000000000a R11: f000000000000000 R12:
ff360ff881d88000[ 17.620360] R13: 0000000000000000 R14:
ff360ff8efa32040 R15: 000ffffffffe0000
[ 17.620367] FS: 0000000000000000(0000) GS:ff360ffa75cc0000(0000)
knlGS:0000000000000000
[ 17.620369] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 17.620371] CR2: 00007f213b4ef008 CR3: 000000011cfca003 CR4:
0000000000771ef0
[ 17.620372] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[ 17.620374] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
0000000000000400
[ 17.620375] PKRU: 55555554
[ 17.620376] Call Trace:
[ 17.620384] <TASK>
[ 17.624278] ? die+0x32/0x90
[ 17.624284] ? do_trap+0xd8/0x100
[ 17.624286] ? assfail+0x36/0x40
[ 17.624288] ? do_error_trap+0x60/0x80
[ 17.624289] ? assfail+0x36/0x40
[ 17.624292] ? exc_invalid_op+0x53/0x70
[ 17.628287] ? assfail+0x36/0x40
[ 17.628291] ? asm_exc_invalid_op+0x1a/0x20
[ 17.628295] ? assfail+0x36/0x40
[ 17.628297] ? assfail+0x23/0x40
[ 17.628299] __xfs_bunmapi+0xb87/0xeb0
[ 17.628304] ? xfs_log_reserve+0x18f/0x210
[ 17.629447] xfs_bunmapi_range+0x62/0xd0
[ 17.631699] xfs_itruncate_extents_flags+0x1c4/0x410
[ 17.631703] xfs_inactive_truncate+0xba/0x140
[ 17.631705] xfs_inactive+0x331/0x3d0
[ 17.631707] xfs_inodegc_worker+0xb8/0x190
[ 17.631709] process_one_work+0x157/0x380
[ 17.633949] worker_thread+0x2ba/0x3e0
[ 17.633953] ? __pfx_worker_thread+0x10/0x10
[ 17.633954] kthread+0xce/0x100
[ 17.633958] ? __pfx_kthread+0x10/0x10
[ 17.633960] ret_from_fork+0x2c/0x50
[ 17.633963] ? __pfx_kthread+0x10/0x10
[ 17.633965] ret_from_fork_asm+0x1a/0x30
[ 17.636314] </TASK>
[ 17.640377] Modules linked in:
[ 17.642375] ---[ end trace 0000000000000000 ]---
Maybe something is going wrong with the AG bno vs fsbno indexing.
That extent allocated has fsbno=50552 (% 24 != 0). The agsize is 22416 fsb.
That 50552 comes from xfs_alloc_vextent_finish() with args->fsbno=50552
= XFS_AGB_TO_FSB(mp, agno=1, agbno=17784) = 32K
(=roundup_power_of_2(22416)) + 17784
So the agbno is aligned, but the fsbno is not.
In __xfs_bunmapi(), at this point:
mod = xfs_bunmapi_align(ip, del.br_startblock);
if (mod) {
xfs_extlen_t off;
if (isforcealign) {
off = ip->i_extsize - mod;
} else {
ASSERT(isrt);
off = mp->m_sb.sb_rextsize - mod;
}
/*
* Realtime extent is lined up at the end but not
* at the front. We'll get rid of full extents if
* we can.
*/
mod=8 del.br_startblock(48776) + del.br_blockcount(1776)=50552
Since this code was originally only used for rt, we may be missing
setting some struct members which were being set for rt. For example,
xfs_trans_alloc() accepts rtextents value, and maybe we should be doing
similar for forcealign. Or xfs_fsb_to_db() has special RT handling, but
I doubt this is the problem.
I have no such issue in using a power-of-2 extsize.
I do realise that I need to share the full code, but I'm reluctant to
post with known bugs.
Please let me know if you have an idea. I'll look at this further when I
get a chance.
Thanks,
John