2024-02-09 11:23:38

by Jan Kara

[permalink] [raw]
Subject: [PATCH 0/3] ext4: Create EA inodes outside of buffer lock

Hello,

ext4_xattr_set_entry() creates new EA inodes while holding buffer lock on the
external xattr block. This is problematic as it nests all the allocation
locking (which acquires locks on other buffers) under the buffer lock. This can
even deadlock when the filesystem is corrupted and e.g. quota file is setup to
contain xattr block as data block as syzbot has spotted. This series moves
the allocation of EA inode to happen outside of the buffer lock which is
generally more sensible and also fixes the syzbot reproducer.

Honza


2024-02-09 11:23:47

by Jan Kara

[permalink] [raw]
Subject: [PATCH 2/3] ext4: Do not create EA inode under buffer lock

ext4_xattr_set_entry() creates new EA inodes while holding buffer lock
on the external xattr block. This is problematic as it nests all the
allocation locking (which acquires locks on other buffers) under the
buffer lock. This can even deadlock when the filesystem is corrupted and
e.g. quota file is setup to contain xattr block as data block. Move the
allocation of EA inode out of ext4_xattr_set_entry() into the callers.

Reported-by: [email protected]
Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/xattr.c | 100 +++++++++++++++++++++++-------------------------
1 file changed, 48 insertions(+), 52 deletions(-)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 146690c10c73..e7e1ffff8eba 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1619,6 +1619,7 @@ static struct inode *ext4_xattr_inode_lookup_create(handle_t *handle,
static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
struct ext4_xattr_search *s,
handle_t *handle, struct inode *inode,
+ struct inode *new_ea_inode,
bool is_block)
{
struct ext4_xattr_entry *last, *next;
@@ -1626,7 +1627,6 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
size_t min_offs = s->end - s->base, name_len = strlen(i->name);
int in_inode = i->in_inode;
struct inode *old_ea_inode = NULL;
- struct inode *new_ea_inode = NULL;
size_t old_size, new_size;
int ret;

@@ -1711,38 +1711,11 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
old_ea_inode = NULL;
goto out;
}
- }
- if (i->value && in_inode) {
- WARN_ON_ONCE(!i->value_len);
-
- new_ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
- i->value, i->value_len);
- if (IS_ERR(new_ea_inode)) {
- ret = PTR_ERR(new_ea_inode);
- new_ea_inode = NULL;
- goto out;
- }
- }

- if (old_ea_inode) {
/* We are ready to release ref count on the old_ea_inode. */
ret = ext4_xattr_inode_dec_ref(handle, old_ea_inode);
- if (ret) {
- /* Release newly required ref count on new_ea_inode. */
- if (new_ea_inode) {
- int err;
-
- err = ext4_xattr_inode_dec_ref(handle,
- new_ea_inode);
- if (err)
- ext4_warning_inode(new_ea_inode,
- "dec ref new_ea_inode err=%d",
- err);
- ext4_xattr_inode_free_quota(inode, new_ea_inode,
- i->value_len);
- }
+ if (ret)
goto out;
- }

ext4_xattr_inode_free_quota(inode, old_ea_inode,
le32_to_cpu(here->e_value_size));
@@ -1866,7 +1839,6 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
ret = 0;
out:
iput(old_ea_inode);
- iput(new_ea_inode);
return ret;
}

@@ -1929,9 +1901,21 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
size_t old_ea_inode_quota = 0;
unsigned int ea_ino;

-
#define header(x) ((struct ext4_xattr_header *)(x))

+ /* If we need EA inode, prepare it before locking the buffer */
+ if (i->value && i->in_inode) {
+ WARN_ON_ONCE(!i->value_len);
+
+ ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
+ i->value, i->value_len);
+ if (IS_ERR(ea_inode)) {
+ error = PTR_ERR(ea_inode);
+ ea_inode = NULL;
+ goto cleanup;
+ }
+ }
+
if (s->base) {
int offset = (char *)s->here - bs->bh->b_data;

@@ -1940,6 +1924,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
EXT4_JTR_NONE);
if (error)
goto cleanup;
+
lock_buffer(bs->bh);

if (header(s->base)->h_refcount == cpu_to_le32(1)) {
@@ -1966,7 +1951,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
}
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
- true /* is_block */);
+ ea_inode, true /* is_block */);
ext4_xattr_block_csum_set(inode, bs->bh);
unlock_buffer(bs->bh);
if (error == -EFSCORRUPTED)
@@ -2034,29 +2019,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
s->end = s->base + sb->s_blocksize;
}

- error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */);
+ error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode,
+ true /* is_block */);
if (error == -EFSCORRUPTED)
goto bad_block;
if (error)
goto cleanup;

- if (i->value && s->here->e_value_inum) {
- /*
- * A ref count on ea_inode has been taken as part of the call to
- * ext4_xattr_set_entry() above. We would like to drop this
- * extra ref but we have to wait until the xattr block is
- * initialized and has its own ref count on the ea_inode.
- */
- ea_ino = le32_to_cpu(s->here->e_value_inum);
- error = ext4_xattr_inode_iget(inode, ea_ino,
- le32_to_cpu(s->here->e_hash),
- &ea_inode);
- if (error) {
- ea_inode = NULL;
- goto cleanup;
- }
- }
-
inserted:
if (!IS_LAST_ENTRY(s->first)) {
new_bh = ext4_xattr_block_cache_find(inode, header(s->base),
@@ -2277,14 +2246,40 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
{
struct ext4_xattr_ibody_header *header;
struct ext4_xattr_search *s = &is->s;
+ struct inode *ea_inode = NULL;
int error;

if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
return -ENOSPC;

- error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
- if (error)
+ /* If we need EA inode, prepare it before locking the buffer */
+ if (i->value && i->in_inode) {
+ WARN_ON_ONCE(!i->value_len);
+
+ ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
+ i->value, i->value_len);
+ if (IS_ERR(ea_inode))
+ return PTR_ERR(ea_inode);
+ }
+ error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode,
+ false /* is_block */);
+ if (error) {
+ if (ea_inode) {
+ int error2;
+
+ error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
+ if (error2)
+ ext4_warning_inode(ea_inode, "dec ref error=%d",
+ error2);
+
+ /* If there was an error, revert the quota charge. */
+ if (error)
+ ext4_xattr_inode_free_quota(inode, ea_inode,
+ i_size_read(ea_inode));
+ iput(ea_inode);
+ }
return error;
+ }
header = IHDR(inode, ext4_raw_inode(&is->iloc));
if (!IS_LAST_ENTRY(s->first)) {
header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
@@ -2293,6 +2288,7 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
header->h_magic = cpu_to_le32(0);
ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
}
+ iput(ea_inode);
return 0;
}

--
2.35.3


2024-02-09 11:31:30

by Jan Kara

[permalink] [raw]
Subject: [PATCH 3/3] ext4: Drop duplicate ea_inode handling in ext4_xattr_block_set()

ext4_xattr_block_set() drops ea_inode reference in two places. Handling
it just under the 'cleanup' label is enough so drop the second
occurence.

Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/xattr.c | 11 -----------
1 file changed, 11 deletions(-)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e7e1ffff8eba..040a40908f39 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -2127,17 +2127,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
ENTRY(header(s->base)+1));
if (error)
goto getblk_failed;
- if (ea_inode) {
- /* Drop the extra ref on ea_inode. */
- error = ext4_xattr_inode_dec_ref(handle,
- ea_inode);
- if (error)
- ext4_warning_inode(ea_inode,
- "dec ref error=%d",
- error);
- iput(ea_inode);
- ea_inode = NULL;
- }

lock_buffer(new_bh);
error = ext4_journal_get_create_access(handle, sb,
--
2.35.3


2024-02-09 11:45:45

by Jan Kara

[permalink] [raw]
Subject: [PATCH 1/3] ext4: Fold quota accounting into ext4_xattr_inode_lookup_create()

When allocating EA inode, quota accounting is done just before
ext4_xattr_inode_lookup_create(). Logically these two operations belong
together so just fold quota accounting into
ext4_xattr_inode_lookup_create(). We also make
ext4_xattr_inode_lookup_create() return the looked up / created inode to
convert the function to a more standard calling convention.

Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/xattr.c | 50 ++++++++++++++++++++++++-------------------------
1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 82dc5e673d5c..146690c10c73 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1565,46 +1565,49 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
/*
* Add value of the EA in an inode.
*/
-static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
- const void *value, size_t value_len,
- struct inode **ret_inode)
+static struct inode *ext4_xattr_inode_lookup_create(handle_t *handle,
+ struct inode *inode, const void *value, size_t value_len)
{
struct inode *ea_inode;
u32 hash;
int err;

+ /* Account inode & space to quota even if sharing... */
+ err = ext4_xattr_inode_alloc_quota(inode, value_len);
+ if (err)
+ return ERR_PTR(err);
+
hash = ext4_xattr_inode_hash(EXT4_SB(inode->i_sb), value, value_len);
ea_inode = ext4_xattr_inode_cache_find(inode, value, value_len, hash);
if (ea_inode) {
err = ext4_xattr_inode_inc_ref(handle, ea_inode);
- if (err) {
- iput(ea_inode);
- return err;
- }
-
- *ret_inode = ea_inode;
- return 0;
+ if (err)
+ goto out_err;
+ return ea_inode;
}

/* Create an inode for the EA value */
ea_inode = ext4_xattr_inode_create(handle, inode, hash);
- if (IS_ERR(ea_inode))
- return PTR_ERR(ea_inode);
+ if (IS_ERR(ea_inode)) {
+ ext4_xattr_inode_free_quota(inode, NULL, value_len);
+ return ea_inode;
+ }

err = ext4_xattr_inode_write(handle, ea_inode, value, value_len);
if (err) {
if (ext4_xattr_inode_dec_ref(handle, ea_inode))
ext4_warning_inode(ea_inode, "cleanup dec ref error %d", err);
- iput(ea_inode);
- return err;
+ goto out_err;
}

if (EA_INODE_CACHE(inode))
mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash,
ea_inode->i_ino, true /* reusable */);
-
- *ret_inode = ea_inode;
- return 0;
+ return ea_inode;
+out_err:
+ iput(ea_inode);
+ ext4_xattr_inode_free_quota(inode, NULL, value_len);
+ return ERR_PTR(err);
}

/*
@@ -1712,16 +1715,11 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
if (i->value && in_inode) {
WARN_ON_ONCE(!i->value_len);

- ret = ext4_xattr_inode_alloc_quota(inode, i->value_len);
- if (ret)
- goto out;
-
- ret = ext4_xattr_inode_lookup_create(handle, inode, i->value,
- i->value_len,
- &new_ea_inode);
- if (ret) {
+ new_ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
+ i->value, i->value_len);
+ if (IS_ERR(new_ea_inode)) {
+ ret = PTR_ERR(new_ea_inode);
new_ea_inode = NULL;
- ext4_xattr_inode_free_quota(inode, NULL, i->value_len);
goto out;
}
}
--
2.35.3


2024-02-22 16:03:31

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 0/3] ext4: Create EA inodes outside of buffer lock


On Fri, 09 Feb 2024 12:20:58 +0100, Jan Kara wrote:
> ext4_xattr_set_entry() creates new EA inodes while holding buffer lock on the
> external xattr block. This is problematic as it nests all the allocation
> locking (which acquires locks on other buffers) under the buffer lock. This can
> even deadlock when the filesystem is corrupted and e.g. quota file is setup to
> contain xattr block as data block as syzbot has spotted. This series moves
> the allocation of EA inode to happen outside of the buffer lock which is
> generally more sensible and also fixes the syzbot reproducer.
>
> [...]

Applied, thanks!

[1/3] ext4: Fold quota accounting into ext4_xattr_inode_lookup_create()
commit: 8208c41c43ad5e9b63dce6c45a73e326109ca658
[2/3] ext4: Do not create EA inode under buffer lock
commit: ea554578483b351693923be42bcff139c8023552
[3/3] ext4: Drop duplicate ea_inode handling in ext4_xattr_block_set()
commit: 72e38f8615148d118ffb82f9055068f8c491c385

Best regards,
--
Theodore Ts'o <[email protected]>

2024-02-29 16:19:47

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 2/3] ext4: Do not create EA inode under buffer lock

On Fri, Feb 09, 2024 at 12:21:00PM +0100, Jan Kara wrote:
> ext4_xattr_set_entry() creates new EA inodes while holding buffer lock
> on the external xattr block. This is problematic as it nests all the
> allocation locking (which acquires locks on other buffers) under the
> buffer lock. This can even deadlock when the filesystem is corrupted and
> e.g. quota file is setup to contain xattr block as data block. Move the
> allocation of EA inode out of ext4_xattr_set_entry() into the callers.
>
> Reported-by: [email protected]
> Signed-off-by: Jan Kara <[email protected]>

In my testing I've found that this is causing a regression for
ext4/026 in the encrypt configuration. This can be replicated using
"kvm-xfstests -c encrypt ext4/026. Logs follow below.

I'll try to take a closer look, but I may end up deciding to drop this
patch or possible the whole patch series until we can figure out
what's going on.

- Ted

ext4/026 1s ... [10:51:57][ 3.111475] run fstests ext4/026 at 2024-02-29 10:51:57
EXT4-fs (vdc): Test dummy encryption mode enabled
EXT4-fs (vdc): Test dummy encryption mode enabled
EXT4-fs error (device vdc): ext4_xattr_inode_iget:443: comm getfattr: error while reading EA inode 18 err=-116
EXT4-fs error (device vdc): ext4_xattr_inode_iget:443: comm getfattr: error while reading EA inode 18 err=-116
EXT4-fs error (device vdc): ext4_xattr_inode_iget:443: comm getfattr: error while reading EA inode 18 err=-116
EXT4-fs error (device vdc): ext4_xattr_inode_iget:443: comm getfattr: error while reading EA inode 18 err=-116
EXT4-fs error (device vdc): ext4_xattr_inode_iget:443: comm getfattr: error while reading EA inode 18 err=-116
EXT4-fs error (device vdc): ext4_xattr_inode_iget:443: comm getfattr: error while reading EA inode 18 err=-116
------------[ cut here ]------------
EA inode 18 ref_count=-1
WARNING: CPU: 0 PID: 2391 at fs/ext4/xattr.c:1064 ext4_xattr_inode_update_ref+0x1c0/0x230
CPU: 0 PID: 2391 Comm: setfattr Not tainted 6.8.0-rc3-xfstests-00021-gf7528aea5d49 #320
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
RIP: 0010:ext4_xattr_inode_update_ref+0x1c0/0x230
Code: 0b e9 21 ff ff ff 80 3d 13 47 5a 01 00 0f 85 14 ff ff ff 48 8b 73 40 48 c7 c7 a6 8e 5d 82 c6 05 fb 46 5a 01 01 e8 50 40 c1 ff <0f> 0b e9 f6 fe ff ff 80 3d e7 46 5a 01 00 0f 85 5d ff ff ff 48 8b
RSP: 0018:ffffc900032cb980 EFLAGS: 00010282
RAX: 0000000000000000 RBX: ffff8880043438a8 RCX: 0000000000000027
RDX: ffff88807dc1c848 RSI: 0000000000000001 RDI: ffff88807dc1c840
RBP: 0000000000000000 R08: 0000000000000000 R09: ffffffff82860e00
R10: ffffc900032cb828 R11: ffffffff828d0e48 R12: ffff888007c93150
R13: ffff888004343948 R14: 00000000ffffffff R15: ffff8880043438a8
FS: 00007fab1e02b740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055e0e4fd2000 CR3: 000000000a0a6002 CR4: 0000000000770ef0
PKRU: 55555554
Call Trace:
<TASK>
? ext4_xattr_inode_update_ref+0x1c0/0x230
? __warn+0x7c/0x130
? ext4_xattr_inode_update_ref+0x1c0/0x230
? report_bug+0x173/0x1d0
? handle_bug+0x3a/0x70
? exc_invalid_op+0x17/0x70
? asm_exc_invalid_op+0x1a/0x20
? ext4_xattr_inode_update_ref+0x1c0/0x230
ext4_xattr_inode_dec_ref_all+0x166/0x330
ext4_xattr_release_block+0x29e/0x300
ext4_xattr_block_set+0x795/0xc70
ext4_xattr_set_handle+0x468/0x680
ext4_xattr_set+0x88/0x160
__vfs_setxattr+0x96/0xd0
__vfs_setxattr_noperm+0x79/0x1d0
vfs_setxattr+0x9f/0x180
setxattr+0x9e/0xc0
path_setxattr+0xc9/0xf0
__x64_sys_setxattr+0x2b/0x40
do_syscall_64+0x52/0x120
entry_SYSCALL_64_after_hwframe+0x6e/0x76
RIP: 0033:0x7fab1e12f4f9
Code: 08 89 e8 5b 5d c3 66 2e 0f 1f 84 00 00 00 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d d7 08 0d 00 f7 d8 64 89 01 48
RSP: 002b:00007fffe7694618 EFLAGS: 00000206 ORIG_RAX: 00000000000000bc
RAX: ffffffffffffffda RBX: 000055e0e4fc3340 RCX: 00007fab1e12f4f9
RDX: 000055e0e4fc3340 RSI: 00007fffe7695a22 RDI: 00007fffe76a5a96
RBP: 00007fffe76a5a96 R08: 0000000000000000 R09: 00007fab1e247020
R10: 0000000000010000 R11: 0000000000000206 R12: 00007fffe7695a22
R13: 000055e0e3e8008c R14: 000055e0e3e82100 R15: 00007fab1e247020
</TASK>
---[ end trace 0000000000000000 ]---
EXT4-fs error (device vdc): ext4_xattr_inode_iget:443: comm getfattr: error while reading EA inode 18 err=-116
EXT4-fs error (device vdc): ext4_xattr_inode_iget:443: comm getfattr: error while reading EA inode 18 err=-116
EXT4-fs error (device vdc): ext4_xattr_inode_iget:443: comm getfattr: error while reading EA inode 18 err=-116
EXT4-fs error (device vdc): ext4_xattr_inode_iget:443: comm getfattr: error while reading EA inode 18 err=-116
EXT4-fs (vdc): Test dummy encryption mode enabled
EXT4-fs (vdc): warning: mounting fs with errors, running e2fsck is recommended

2024-03-14 18:12:21

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 2/3] ext4: Do not create EA inode under buffer lock

On Thu 29-02-24 10:59:17, Theodore Ts'o wrote:
> On Fri, Feb 09, 2024 at 12:21:00PM +0100, Jan Kara wrote:
> > ext4_xattr_set_entry() creates new EA inodes while holding buffer lock
> > on the external xattr block. This is problematic as it nests all the
> > allocation locking (which acquires locks on other buffers) under the
> > buffer lock. This can even deadlock when the filesystem is corrupted and
> > e.g. quota file is setup to contain xattr block as data block. Move the
> > allocation of EA inode out of ext4_xattr_set_entry() into the callers.
> >
> > Reported-by: [email protected]
> > Signed-off-by: Jan Kara <[email protected]>
>
> In my testing I've found that this is causing a regression for
> ext4/026 in the encrypt configuration. This can be replicated using
> "kvm-xfstests -c encrypt ext4/026. Logs follow below.
>
> I'll try to take a closer look, but I may end up deciding to drop this
> patch or possible the whole patch series until we can figure out
> what's going on.

OK, I've found the problem. I'll rebase patches on top of rc1 when it
happens and send fixed version. Thanks for catching this!

Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR

2024-03-21 16:27:18

by Jan Kara

[permalink] [raw]
Subject: [PATCH 2/2] ext4: Do not create EA inode under buffer lock

ext4_xattr_set_entry() creates new EA inodes while holding buffer lock
on the external xattr block. This is problematic as it nests all the
allocation locking (which acquires locks on other buffers) under the
buffer lock. This can even deadlock when the filesystem is corrupted and
e.g. quota file is setup to contain xattr block as data block. Move the
allocation of EA inode out of ext4_xattr_set_entry() into the callers.

Reported-by: [email protected]
Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/xattr.c | 113 +++++++++++++++++++++++-------------------------
1 file changed, 53 insertions(+), 60 deletions(-)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 146690c10c73..04f90df8dbae 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1619,6 +1619,7 @@ static struct inode *ext4_xattr_inode_lookup_create(handle_t *handle,
static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
struct ext4_xattr_search *s,
handle_t *handle, struct inode *inode,
+ struct inode *new_ea_inode,
bool is_block)
{
struct ext4_xattr_entry *last, *next;
@@ -1626,7 +1627,6 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
size_t min_offs = s->end - s->base, name_len = strlen(i->name);
int in_inode = i->in_inode;
struct inode *old_ea_inode = NULL;
- struct inode *new_ea_inode = NULL;
size_t old_size, new_size;
int ret;

@@ -1711,38 +1711,11 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
old_ea_inode = NULL;
goto out;
}
- }
- if (i->value && in_inode) {
- WARN_ON_ONCE(!i->value_len);
-
- new_ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
- i->value, i->value_len);
- if (IS_ERR(new_ea_inode)) {
- ret = PTR_ERR(new_ea_inode);
- new_ea_inode = NULL;
- goto out;
- }
- }

- if (old_ea_inode) {
/* We are ready to release ref count on the old_ea_inode. */
ret = ext4_xattr_inode_dec_ref(handle, old_ea_inode);
- if (ret) {
- /* Release newly required ref count on new_ea_inode. */
- if (new_ea_inode) {
- int err;
-
- err = ext4_xattr_inode_dec_ref(handle,
- new_ea_inode);
- if (err)
- ext4_warning_inode(new_ea_inode,
- "dec ref new_ea_inode err=%d",
- err);
- ext4_xattr_inode_free_quota(inode, new_ea_inode,
- i->value_len);
- }
+ if (ret)
goto out;
- }

ext4_xattr_inode_free_quota(inode, old_ea_inode,
le32_to_cpu(here->e_value_size));
@@ -1866,7 +1839,6 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
ret = 0;
out:
iput(old_ea_inode);
- iput(new_ea_inode);
return ret;
}

@@ -1929,9 +1901,21 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
size_t old_ea_inode_quota = 0;
unsigned int ea_ino;

-
#define header(x) ((struct ext4_xattr_header *)(x))

+ /* If we need EA inode, prepare it before locking the buffer */
+ if (i->value && i->in_inode) {
+ WARN_ON_ONCE(!i->value_len);
+
+ ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
+ i->value, i->value_len);
+ if (IS_ERR(ea_inode)) {
+ error = PTR_ERR(ea_inode);
+ ea_inode = NULL;
+ goto cleanup;
+ }
+ }
+
if (s->base) {
int offset = (char *)s->here - bs->bh->b_data;

@@ -1940,6 +1924,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
EXT4_JTR_NONE);
if (error)
goto cleanup;
+
lock_buffer(bs->bh);

if (header(s->base)->h_refcount == cpu_to_le32(1)) {
@@ -1966,7 +1951,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
}
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
- true /* is_block */);
+ ea_inode, true /* is_block */);
ext4_xattr_block_csum_set(inode, bs->bh);
unlock_buffer(bs->bh);
if (error == -EFSCORRUPTED)
@@ -2034,29 +2019,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
s->end = s->base + sb->s_blocksize;
}

- error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */);
+ error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode,
+ true /* is_block */);
if (error == -EFSCORRUPTED)
goto bad_block;
if (error)
goto cleanup;

- if (i->value && s->here->e_value_inum) {
- /*
- * A ref count on ea_inode has been taken as part of the call to
- * ext4_xattr_set_entry() above. We would like to drop this
- * extra ref but we have to wait until the xattr block is
- * initialized and has its own ref count on the ea_inode.
- */
- ea_ino = le32_to_cpu(s->here->e_value_inum);
- error = ext4_xattr_inode_iget(inode, ea_ino,
- le32_to_cpu(s->here->e_hash),
- &ea_inode);
- if (error) {
- ea_inode = NULL;
- goto cleanup;
- }
- }
-
inserted:
if (!IS_LAST_ENTRY(s->first)) {
new_bh = ext4_xattr_block_cache_find(inode, header(s->base),
@@ -2209,17 +2178,16 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,

cleanup:
if (ea_inode) {
- int error2;
-
- error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
- if (error2)
- ext4_warning_inode(ea_inode, "dec ref error=%d",
- error2);
+ if (error) {
+ int error2;

- /* If there was an error, revert the quota charge. */
- if (error)
+ error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
+ if (error2)
+ ext4_warning_inode(ea_inode, "dec ref error=%d",
+ error2);
ext4_xattr_inode_free_quota(inode, ea_inode,
i_size_read(ea_inode));
+ }
iput(ea_inode);
}
if (ce)
@@ -2277,14 +2245,38 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
{
struct ext4_xattr_ibody_header *header;
struct ext4_xattr_search *s = &is->s;
+ struct inode *ea_inode = NULL;
int error;

if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
return -ENOSPC;

- error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
- if (error)
+ /* If we need EA inode, prepare it before locking the buffer */
+ if (i->value && i->in_inode) {
+ WARN_ON_ONCE(!i->value_len);
+
+ ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
+ i->value, i->value_len);
+ if (IS_ERR(ea_inode))
+ return PTR_ERR(ea_inode);
+ }
+ error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode,
+ false /* is_block */);
+ if (error) {
+ if (ea_inode) {
+ int error2;
+
+ error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
+ if (error2)
+ ext4_warning_inode(ea_inode, "dec ref error=%d",
+ error2);
+
+ ext4_xattr_inode_free_quota(inode, ea_inode,
+ i_size_read(ea_inode));
+ iput(ea_inode);
+ }
return error;
+ }
header = IHDR(inode, ext4_raw_inode(&is->iloc));
if (!IS_LAST_ENTRY(s->first)) {
header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
@@ -2293,6 +2285,7 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
header->h_magic = cpu_to_le32(0);
ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
}
+ iput(ea_inode);
return 0;
}

--
2.35.3


2024-03-21 16:37:17

by Jan Kara

[permalink] [raw]
Subject: [PATCH 1/2] Revert "ext4: drop duplicate ea_inode handling in ext4_xattr_block_set()"

This reverts commit 7f48212678e91a057259b3e281701f7feb1ee397. We will
need the special cleanup handling once we move allocation of EA inode
outside of the buffer lock in the following patch.

Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/xattr.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index b67a176bfcf9..146690c10c73 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -2158,6 +2158,17 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
ENTRY(header(s->base)+1));
if (error)
goto getblk_failed;
+ if (ea_inode) {
+ /* Drop the extra ref on ea_inode. */
+ error = ext4_xattr_inode_dec_ref(handle,
+ ea_inode);
+ if (error)
+ ext4_warning_inode(ea_inode,
+ "dec ref error=%d",
+ error);
+ iput(ea_inode);
+ ea_inode = NULL;
+ }

lock_buffer(new_bh);
error = ext4_journal_get_create_access(handle, sb,
--
2.35.3


2024-03-22 18:12:07

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH 2/2] ext4: Do not create EA inode under buffer lock

On Mar 21, 2024, at 10:26 AM, Jan Kara <[email protected]> wrote:
>
> ext4_xattr_set_entry() creates new EA inodes while holding buffer lock
> on the external xattr block. This is problematic as it nests all the
> allocation locking (which acquires locks on other buffers) under the
> buffer lock. This can even deadlock when the filesystem is corrupted and
> e.g. quota file is setup to contain xattr block as data block. Move the
> allocation of EA inode out of ext4_xattr_set_entry() into the callers.

This looks like it will allocate a new inode for every setxattr called,
even if the xattr is small and will likely fit inside the inode itself?
This would seem to add a lot of extra overhead for the 99% of cases when
an external inode is not needed.

The ext4_xattr_inode_lookup_create() call is not just doing an inode
bitmap lookup/allocation, it is also looking up the xattr hash in a hash
table, allocating and initializing an ext4 and VFS inode, quota, writing
it to the journal and using up journal credits (which need to be allocated
larger), etc. so it is by no means lightweight vs. memcpy() into the inode
buffer for most small xattrs.

It would be better to only preallocate the inode in the case when the
xattr size is large (say value_len > blocksize/2 or > blocksize * 3/4)
when there is a decent chance it will be needed. Otherwise, do not
preallocate the xattr inode before calling ext4_xattr_set_entry(), and
have it return -EAGAIN if there wasn't enough room in the inode or in
the external xattr block to hold the value, and the caller can jump back
to allocate the xattr inode and try again (once only), something like:


@@ -1929,9 +1901,21 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,

+ /* If we need EA inode, prepare it before locking the buffer */
+ if (i->value && i->in_inode && i->value_len > i_blocksize(inode)/2) {
+alloc_inode:
+ ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
+ i->value, i->value_len);
+ if (IS_ERR(ea_inode)) {
+ error = PTR_ERR(ea_inode);
+ ea_inode = NULL;
+ goto cleanup;
+ }
+ }
+
if (s->base) {
int offset = (char *)s->here - bs->bh->b_data;

@@ -1966,7 +1951,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
}
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
- true /* is_block */);
+ ea_inode, true /* is_block */);
ext4_xattr_block_csum_set(inode, bs->bh);
unlock_buffer(bs->bh);
+ if (error == -EAGAIN && !ea_inode)
+ goto alloc_inode;
if (error == -EFSCORRUPTED)

Cheers, Andreas

> Reported-by: [email protected]
> Signed-off-by: Jan Kara <[email protected]>
> ---
> fs/ext4/xattr.c | 113 +++++++++++++++++++++++-------------------------
> 1 file changed, 53 insertions(+), 60 deletions(-)
>
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index 146690c10c73..04f90df8dbae 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -1619,6 +1619,7 @@ static struct inode *ext4_xattr_inode_lookup_create(handle_t *handle,
> static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
> struct ext4_xattr_search *s,
> handle_t *handle, struct inode *inode,
> + struct inode *new_ea_inode,
> bool is_block)
> {
> struct ext4_xattr_entry *last, *next;
> @@ -1626,7 +1627,6 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
> size_t min_offs = s->end - s->base, name_len = strlen(i->name);
> int in_inode = i->in_inode;
> struct inode *old_ea_inode = NULL;
> - struct inode *new_ea_inode = NULL;
> size_t old_size, new_size;
> int ret;
>
> @@ -1711,38 +1711,11 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
> old_ea_inode = NULL;
> goto out;
> }
> - }
> - if (i->value && in_inode) {
> - WARN_ON_ONCE(!i->value_len);
> -
> - new_ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
> - i->value, i->value_len);
> - if (IS_ERR(new_ea_inode)) {
> - ret = PTR_ERR(new_ea_inode);
> - new_ea_inode = NULL;
> - goto out;
> - }
> - }
>
> - if (old_ea_inode) {
> /* We are ready to release ref count on the old_ea_inode. */
> ret = ext4_xattr_inode_dec_ref(handle, old_ea_inode);
> - if (ret) {
> - /* Release newly required ref count on new_ea_inode. */
> - if (new_ea_inode) {
> - int err;
> -
> - err = ext4_xattr_inode_dec_ref(handle,
> - new_ea_inode);
> - if (err)
> - ext4_warning_inode(new_ea_inode,
> - "dec ref new_ea_inode err=%d",
> - err);
> - ext4_xattr_inode_free_quota(inode, new_ea_inode,
> - i->value_len);
> - }
> + if (ret)
> goto out;
> - }
>
> ext4_xattr_inode_free_quota(inode, old_ea_inode,
> le32_to_cpu(here->e_value_size));
> @@ -1866,7 +1839,6 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
> ret = 0;
> out:
> iput(old_ea_inode);
> - iput(new_ea_inode);
> return ret;
> }
>
> @@ -1929,9 +1901,21 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
> size_t old_ea_inode_quota = 0;
> unsigned int ea_ino;
>
> -
> #define header(x) ((struct ext4_xattr_header *)(x))
>
> + /* If we need EA inode, prepare it before locking the buffer */
> + if (i->value && i->in_inode) {
> + WARN_ON_ONCE(!i->value_len);
> +
> + ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
> + i->value, i->value_len);
> + if (IS_ERR(ea_inode)) {
> + error = PTR_ERR(ea_inode);
> + ea_inode = NULL;
> + goto cleanup;
> + }
> + }
> +
> if (s->base) {
> int offset = (char *)s->here - bs->bh->b_data;
>
> @@ -1940,6 +1924,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
> EXT4_JTR_NONE);
> if (error)
> goto cleanup;
> +
> lock_buffer(bs->bh);
>
> if (header(s->base)->h_refcount == cpu_to_le32(1)) {
> @@ -1966,7 +1951,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
> }
> ea_bdebug(bs->bh, "modifying in-place");
> error = ext4_xattr_set_entry(i, s, handle, inode,
> - true /* is_block */);
> + ea_inode, true /* is_block */);
> ext4_xattr_block_csum_set(inode, bs->bh);
> unlock_buffer(bs->bh);
> if (error == -EFSCORRUPTED)
> @@ -2034,29 +2019,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
> s->end = s->base + sb->s_blocksize;
> }
>
> - error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */);
> + error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode,
> + true /* is_block */);
> if (error == -EFSCORRUPTED)
> goto bad_block;
> if (error)
> goto cleanup;
>
> - if (i->value && s->here->e_value_inum) {
> - /*
> - * A ref count on ea_inode has been taken as part of the call to
> - * ext4_xattr_set_entry() above. We would like to drop this
> - * extra ref but we have to wait until the xattr block is
> - * initialized and has its own ref count on the ea_inode.
> - */
> - ea_ino = le32_to_cpu(s->here->e_value_inum);
> - error = ext4_xattr_inode_iget(inode, ea_ino,
> - le32_to_cpu(s->here->e_hash),
> - &ea_inode);
> - if (error) {
> - ea_inode = NULL;
> - goto cleanup;
> - }
> - }
> -
> inserted:
> if (!IS_LAST_ENTRY(s->first)) {
> new_bh = ext4_xattr_block_cache_find(inode, header(s->base),
> @@ -2209,17 +2178,16 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
>
> cleanup:
> if (ea_inode) {
> - int error2;
> -
> - error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
> - if (error2)
> - ext4_warning_inode(ea_inode, "dec ref error=%d",
> - error2);
> + if (error) {
> + int error2;
>
> - /* If there was an error, revert the quota charge. */
> - if (error)
> + error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
> + if (error2)
> + ext4_warning_inode(ea_inode, "dec ref error=%d",
> + error2);
> ext4_xattr_inode_free_quota(inode, ea_inode,
> i_size_read(ea_inode));
> + }
> iput(ea_inode);
> }
> if (ce)
> @@ -2277,14 +2245,38 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
> {
> struct ext4_xattr_ibody_header *header;
> struct ext4_xattr_search *s = &is->s;
> + struct inode *ea_inode = NULL;
> int error;
>
> if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
> return -ENOSPC;
>
> - error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
> - if (error)
> + /* If we need EA inode, prepare it before locking the buffer */
> + if (i->value && i->in_inode) {
> + WARN_ON_ONCE(!i->value_len);
> +
> + ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
> + i->value, i->value_len);
> + if (IS_ERR(ea_inode))
> + return PTR_ERR(ea_inode);
> + }
> + error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode,
> + false /* is_block */);
> + if (error) {
> + if (ea_inode) {
> + int error2;
> +
> + error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
> + if (error2)
> + ext4_warning_inode(ea_inode, "dec ref error=%d",
> + error2);
> +
> + ext4_xattr_inode_free_quota(inode, ea_inode,
> + i_size_read(ea_inode));
> + iput(ea_inode);
> + }
> return error;
> + }
> header = IHDR(inode, ext4_raw_inode(&is->iloc));
> if (!IS_LAST_ENTRY(s->first)) {
> header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
> @@ -2293,6 +2285,7 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
> header->h_magic = cpu_to_le32(0);
> ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
> }
> + iput(ea_inode);
> return 0;
> }
>
> --
> 2.35.3
>
>


Cheers, Andreas






Attachments:
signature.asc (890.00 B)
Message signed with OpenPGP

2024-03-25 19:03:45

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 2/2] ext4: Do not create EA inode under buffer lock

On Fri 22-03-24 12:06:16, Andreas Dilger wrote:
> On Mar 21, 2024, at 10:26 AM, Jan Kara <[email protected]> wrote:
> >
> > ext4_xattr_set_entry() creates new EA inodes while holding buffer lock
> > on the external xattr block. This is problematic as it nests all the
> > allocation locking (which acquires locks on other buffers) under the
> > buffer lock. This can even deadlock when the filesystem is corrupted and
> > e.g. quota file is setup to contain xattr block as data block. Move the
> > allocation of EA inode out of ext4_xattr_set_entry() into the callers.
>
> This looks like it will allocate a new inode for every setxattr called,
> even if the xattr is small and will likely fit inside the inode itself?
> This would seem to add a lot of extra overhead for the 99% of cases when
> an external inode is not needed.

This is not the case AFAICT. We call ext4_xattr_inode_lookup_create() only
in:

if (i->value && i->in_inode) {

so that means we've already decided we need to put the xattr value in the
EA inode. Note that ext4_xattr_set_handle() for smaller xattr value first
calls ext4_xattr_block_set() with i.in_inode == 0 and if that fails due to
ENOSPC, it sets i.in_inode = 1 and tries again.

So I think everything is fine.

Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR