2020-03-23 01:26:51

by Chao Yu

[permalink] [raw]
Subject: [PATCH v3] f2fs: fix potential .flags overflow on 32bit architecture

f2fs_inode_info.flags is unsigned long variable, it has 32 bits
in 32bit architecture, since we introduced FI_MMAP_FILE flag
when we support data compression, we may access memory cross
the border of .flags field, corrupting .i_sem field, result in
below deadlock.

To fix this issue, let's expand .flags as an array to grab enough
space to store new flags.

Call Trace:
__schedule+0x8d0/0x13fc
? mark_held_locks+0xac/0x100
schedule+0xcc/0x260
rwsem_down_write_slowpath+0x3ab/0x65d
down_write+0xc7/0xe0
f2fs_drop_nlink+0x3d/0x600 [f2fs]
f2fs_delete_inline_entry+0x300/0x440 [f2fs]
f2fs_delete_entry+0x3a1/0x7f0 [f2fs]
f2fs_unlink+0x500/0x790 [f2fs]
vfs_unlink+0x211/0x490
do_unlinkat+0x483/0x520
sys_unlink+0x4a/0x70
do_fast_syscall_32+0x12b/0x683
entry_SYSENTER_32+0xaa/0x102

Fixes: 4c8ff7095bef ("f2fs: support data compression")
Signed-off-by: Chao Yu <[email protected]>
---
v3:
- clean up with test_and_{set,clear}_bit suggested by Joe Perches
- clean up with BITS_TO_LONGS suggested by Ondřej Jirman
- avoid unneeded parameter type change in __mark_inode_dirty_flag()
fs/f2fs/f2fs.h | 112 ++++++++++++++++++++++++++----------------------
fs/f2fs/inode.c | 4 +-
2 files changed, 63 insertions(+), 53 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index fcafa68212eb..a68f2301a1ff 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -682,6 +682,47 @@ enum {
MAX_GC_FAILURE
};

+/* used for f2fs_inode_info->flags */
+enum {
+ FI_NEW_INODE, /* indicate newly allocated inode */
+ FI_DIRTY_INODE, /* indicate inode is dirty or not */
+ FI_AUTO_RECOVER, /* indicate inode is recoverable */
+ FI_DIRTY_DIR, /* indicate directory has dirty pages */
+ FI_INC_LINK, /* need to increment i_nlink */
+ FI_ACL_MODE, /* indicate acl mode */
+ FI_NO_ALLOC, /* should not allocate any blocks */
+ FI_FREE_NID, /* free allocated nide */
+ FI_NO_EXTENT, /* not to use the extent cache */
+ FI_INLINE_XATTR, /* used for inline xattr */
+ FI_INLINE_DATA, /* used for inline data*/
+ FI_INLINE_DENTRY, /* used for inline dentry */
+ FI_APPEND_WRITE, /* inode has appended data */
+ FI_UPDATE_WRITE, /* inode has in-place-update data */
+ FI_NEED_IPU, /* used for ipu per file */
+ FI_ATOMIC_FILE, /* indicate atomic file */
+ FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */
+ FI_VOLATILE_FILE, /* indicate volatile file */
+ FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */
+ FI_DROP_CACHE, /* drop dirty page cache */
+ FI_DATA_EXIST, /* indicate data exists */
+ FI_INLINE_DOTS, /* indicate inline dot dentries */
+ FI_DO_DEFRAG, /* indicate defragment is running */
+ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
+ FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
+ FI_HOT_DATA, /* indicate file is hot */
+ FI_EXTRA_ATTR, /* indicate file has extra attribute */
+ FI_PROJ_INHERIT, /* indicate file inherits projectid */
+ FI_PIN_FILE, /* indicate file should not be gced */
+ FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */
+ FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
+ FI_COMPRESSED_FILE, /* indicate file's data can be compressed */
+ FI_MMAP_FILE, /* indicate file was mmapped */
+ FI_MAX, /* max flag, never be used */
+};
+
+/* f2fs_inode_info.flags array size */
+#define FI_ARRAY_SIZE (BITS_TO_LONGS(FI_MAX))
+
struct f2fs_inode_info {
struct inode vfs_inode; /* serve a vfs inode */
unsigned long i_flags; /* keep an inode flags for ioctl */
@@ -694,7 +735,7 @@ struct f2fs_inode_info {
umode_t i_acl_mode; /* keep file acl mode temporarily */

/* Use below internally in f2fs*/
- unsigned long flags; /* use to pass per-file flags */
+ unsigned long flags[FI_ARRAY_SIZE]; /* use to pass per-file flags */
struct rw_semaphore i_sem; /* protect fi info */
atomic_t dirty_pages; /* # of dirty pages */
f2fs_hash_t chash; /* hash value of given file name */
@@ -2531,43 +2572,6 @@ static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags)
return flags & F2FS_OTHER_FLMASK;
}

-/* used for f2fs_inode_info->flags */
-enum {
- FI_NEW_INODE, /* indicate newly allocated inode */
- FI_DIRTY_INODE, /* indicate inode is dirty or not */
- FI_AUTO_RECOVER, /* indicate inode is recoverable */
- FI_DIRTY_DIR, /* indicate directory has dirty pages */
- FI_INC_LINK, /* need to increment i_nlink */
- FI_ACL_MODE, /* indicate acl mode */
- FI_NO_ALLOC, /* should not allocate any blocks */
- FI_FREE_NID, /* free allocated nide */
- FI_NO_EXTENT, /* not to use the extent cache */
- FI_INLINE_XATTR, /* used for inline xattr */
- FI_INLINE_DATA, /* used for inline data*/
- FI_INLINE_DENTRY, /* used for inline dentry */
- FI_APPEND_WRITE, /* inode has appended data */
- FI_UPDATE_WRITE, /* inode has in-place-update data */
- FI_NEED_IPU, /* used for ipu per file */
- FI_ATOMIC_FILE, /* indicate atomic file */
- FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */
- FI_VOLATILE_FILE, /* indicate volatile file */
- FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */
- FI_DROP_CACHE, /* drop dirty page cache */
- FI_DATA_EXIST, /* indicate data exists */
- FI_INLINE_DOTS, /* indicate inline dot dentries */
- FI_DO_DEFRAG, /* indicate defragment is running */
- FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
- FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
- FI_HOT_DATA, /* indicate file is hot */
- FI_EXTRA_ATTR, /* indicate file has extra attribute */
- FI_PROJ_INHERIT, /* indicate file inherits projectid */
- FI_PIN_FILE, /* indicate file should not be gced */
- FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */
- FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
- FI_COMPRESSED_FILE, /* indicate file's data can be compressed */
- FI_MMAP_FILE, /* indicate file was mmapped */
-};
-
static inline void __mark_inode_dirty_flag(struct inode *inode,
int flag, bool set)
{
@@ -2586,22 +2590,28 @@ static inline void __mark_inode_dirty_flag(struct inode *inode,
}
}

+static inline void __set_inode_flag(struct inode *inode, int flag)
+{
+ test_and_set_bit(flag % BITS_PER_LONG,
+ &F2FS_I(inode)->flags[BIT_WORD(flag)]);
+}
+
static inline void set_inode_flag(struct inode *inode, int flag)
{
- if (!test_bit(flag, &F2FS_I(inode)->flags))
- set_bit(flag, &F2FS_I(inode)->flags);
+ __set_inode_flag(inode, flag);
__mark_inode_dirty_flag(inode, flag, true);
}

static inline int is_inode_flag_set(struct inode *inode, int flag)
{
- return test_bit(flag, &F2FS_I(inode)->flags);
+ return test_bit(flag % BITS_PER_LONG,
+ &F2FS_I(inode)->flags[BIT_WORD(flag)]);
}

static inline void clear_inode_flag(struct inode *inode, int flag)
{
- if (test_bit(flag, &F2FS_I(inode)->flags))
- clear_bit(flag, &F2FS_I(inode)->flags);
+ test_and_clear_bit(flag % BITS_PER_LONG,
+ &F2FS_I(inode)->flags[BIT_WORD(flag)]);
__mark_inode_dirty_flag(inode, flag, false);
}

@@ -2689,22 +2699,20 @@ static inline void f2fs_i_pino_write(struct inode *inode, nid_t pino)

static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri)
{
- struct f2fs_inode_info *fi = F2FS_I(inode);
-
if (ri->i_inline & F2FS_INLINE_XATTR)
- set_bit(FI_INLINE_XATTR, &fi->flags);
+ __set_inode_flag(inode, FI_INLINE_XATTR);
if (ri->i_inline & F2FS_INLINE_DATA)
- set_bit(FI_INLINE_DATA, &fi->flags);
+ __set_inode_flag(inode, FI_INLINE_DATA);
if (ri->i_inline & F2FS_INLINE_DENTRY)
- set_bit(FI_INLINE_DENTRY, &fi->flags);
+ __set_inode_flag(inode, FI_INLINE_DENTRY);
if (ri->i_inline & F2FS_DATA_EXIST)
- set_bit(FI_DATA_EXIST, &fi->flags);
+ __set_inode_flag(inode, FI_DATA_EXIST);
if (ri->i_inline & F2FS_INLINE_DOTS)
- set_bit(FI_INLINE_DOTS, &fi->flags);
+ __set_inode_flag(inode, FI_INLINE_DOTS);
if (ri->i_inline & F2FS_EXTRA_ATTR)
- set_bit(FI_EXTRA_ATTR, &fi->flags);
+ __set_inode_flag(inode, FI_EXTRA_ATTR);
if (ri->i_inline & F2FS_PIN_FILE)
- set_bit(FI_PIN_FILE, &fi->flags);
+ __set_inode_flag(inode, FI_PIN_FILE);
}

static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri)
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 44e08bf2e2b4..5e441fdd4858 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -328,6 +328,7 @@ static int do_read_inode(struct inode *inode)
struct f2fs_inode *ri;
projid_t i_projid;
int err;
+ int i;

/* Check if ino is within scope */
if (f2fs_check_nid_range(sbi, inode->i_ino))
@@ -362,7 +363,8 @@ static int do_read_inode(struct inode *inode)
fi->i_flags = le32_to_cpu(ri->i_flags);
if (S_ISREG(inode->i_mode))
fi->i_flags &= ~F2FS_PROJINHERIT_FL;
- fi->flags = 0;
+ for (i = 0; i < FI_ARRAY_SIZE; i++)
+ fi->flags[i] = 0;
fi->i_advise = ri->i_advise;
fi->i_pino = le32_to_cpu(ri->i_pino);
fi->i_dir_level = ri->i_dir_level;
--
2.18.0.rc1


2020-03-23 01:52:43

by Ondřej Jirman

[permalink] [raw]
Subject: Re: [PATCH v3] f2fs: fix potential .flags overflow on 32bit architecture

Hello Chao Yu,

On Mon, Mar 23, 2020 at 09:25:19AM +0800, Chao Yu wrote:
> [snip]
>
> +static inline void __set_inode_flag(struct inode *inode, int flag)
> +{
> + test_and_set_bit(flag % BITS_PER_LONG,
> + &F2FS_I(inode)->flags[BIT_WORD(flag)]);

This can simply be:

test_and_set_bit(flag, F2FS_I(inode)->flags);

all of these bitmap manipulation functions already will do the
right thing to access the correct location in the flags array:

https://elixir.bootlin.com/linux/latest/source/include/asm-generic/bitops/atomic.h#L32

see BIT_MASK and BIT_WORD use in that function.

> +}
> +
> static inline void set_inode_flag(struct inode *inode, int flag)
> {
> - if (!test_bit(flag, &F2FS_I(inode)->flags))
> - set_bit(flag, &F2FS_I(inode)->flags);
> + __set_inode_flag(inode, flag);
> __mark_inode_dirty_flag(inode, flag, true);
> }
>
> static inline int is_inode_flag_set(struct inode *inode, int flag)
> {
> - return test_bit(flag, &F2FS_I(inode)->flags);
> + return test_bit(flag % BITS_PER_LONG,
> + &F2FS_I(inode)->flags[BIT_WORD(flag)]);

ditto

> }
>
> static inline void clear_inode_flag(struct inode *inode, int flag)
> {
> - if (test_bit(flag, &F2FS_I(inode)->flags))
> - clear_bit(flag, &F2FS_I(inode)->flags);
> + test_and_clear_bit(flag % BITS_PER_LONG,
> + &F2FS_I(inode)->flags[BIT_WORD(flag)]);

ditto

I'm going to test the patch. It looks like that this was really
the root cause of all those locking issues I was seeing on my
32-bit tablet. It seems to explain why my 64-bit systems were
not affected, and why reverting compession fixed it too.
Great job figuring this out.

I'll let you know soon.

thank you and regards,
o.

> __mark_inode_dirty_flag(inode, flag, false);
> }
>

2020-03-23 02:05:00

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH v3] f2fs: fix potential .flags overflow on 32bit architecture

On Mon, 2020-03-23 at 09:25 +0800, Chao Yu wrote:
> f2fs_inode_info.flags is unsigned long variable, it has 32 bits
> in 32bit architecture, since we introduced FI_MMAP_FILE flag
> when we support data compression, we may access memory cross
> the border of .flags field, corrupting .i_sem field, result in
> below deadlock.
>
> To fix this issue, let's expand .flags as an array to grab enough
> space to store new flags.
[]
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
[]
> @@ -2586,22 +2590,28 @@ static inline void __mark_inode_dirty_flag(struct inode *inode,
> }
> }
>
> +static inline void __set_inode_flag(struct inode *inode, int flag)
> +{
> + test_and_set_bit(flag % BITS_PER_LONG,
> + &F2FS_I(inode)->flags[BIT_WORD(flag)]);

I believe this should just use

test_and_set_bit(flag, F2FS_I(inode)->flags);

> static inline int is_inode_flag_set(struct inode *inode, int flag)
> {
> - return test_bit(flag, &F2FS_I(inode)->flags);
> + return test_bit(flag % BITS_PER_LONG,
> + &F2FS_I(inode)->flags[BIT_WORD(flag)]);

here too.

test_bit(flag, F2FS_I(inode)->flags);

> static inline void clear_inode_flag(struct inode *inode, int flag)
> {
> - if (test_bit(flag, &F2FS_I(inode)->flags))
> - clear_bit(flag, &F2FS_I(inode)->flags);
> + test_and_clear_bit(flag % BITS_PER_LONG,
> + &F2FS_I(inode)->flags[BIT_WORD(flag)]);

and here.

I also don't know why these functions are used at all.


2020-03-23 02:13:11

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v3] f2fs: fix potential .flags overflow on 32bit architecture

Hello Ondřej,

On 2020/3/23 9:50, Ondřej Jirman wrote:
> Hello Chao Yu,
>
> On Mon, Mar 23, 2020 at 09:25:19AM +0800, Chao Yu wrote:
>> [snip]
>>
>> +static inline void __set_inode_flag(struct inode *inode, int flag)
>> +{
>> + test_and_set_bit(flag % BITS_PER_LONG,
>> + &F2FS_I(inode)->flags[BIT_WORD(flag)]);
>
> This can simply be:
>
> test_and_set_bit(flag, F2FS_I(inode)->flags);
>
> all of these bitmap manipulation functions already will do the
> right thing to access the correct location in the flags array:
>
> https://elixir.bootlin.com/linux/latest/source/include/asm-generic/bitops/atomic.h#L32
>
> see BIT_MASK and BIT_WORD use in that function.

Oops, most f2fs bitmap check uses the same form, I missed this case....

>
>> +}
>> +
>> static inline void set_inode_flag(struct inode *inode, int flag)
>> {
>> - if (!test_bit(flag, &F2FS_I(inode)->flags))
>> - set_bit(flag, &F2FS_I(inode)->flags);
>> + __set_inode_flag(inode, flag);
>> __mark_inode_dirty_flag(inode, flag, true);
>> }
>>
>> static inline int is_inode_flag_set(struct inode *inode, int flag)
>> {
>> - return test_bit(flag, &F2FS_I(inode)->flags);
>> + return test_bit(flag % BITS_PER_LONG,
>> + &F2FS_I(inode)->flags[BIT_WORD(flag)]);
>
> ditto
>
>> }
>>
>> static inline void clear_inode_flag(struct inode *inode, int flag)
>> {
>> - if (test_bit(flag, &F2FS_I(inode)->flags))
>> - clear_bit(flag, &F2FS_I(inode)->flags);
>> + test_and_clear_bit(flag % BITS_PER_LONG,
>> + &F2FS_I(inode)->flags[BIT_WORD(flag)]);
>
> ditto
>
> I'm going to test the patch. It looks like that this was really
> the root cause of all those locking issues I was seeing on my
> 32-bit tablet. It seems to explain why my 64-bit systems were
> not affected, and why reverting compession fixed it too.
> Great job figuring this out.
>
> I'll let you know soon.

Great, hoping this patch can fix the issue this time.

Thanks anyway for supporting on troubleshooting this issue.

Thanks,

>
> thank you and regards,
> o.
>
>> __mark_inode_dirty_flag(inode, flag, false);
>> }
>>
> .
>

2020-03-23 02:21:12

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v3] f2fs: fix potential .flags overflow on 32bit architecture

On 2020/3/23 10:00, Joe Perches wrote:
> On Mon, 2020-03-23 at 09:25 +0800, Chao Yu wrote:
>> f2fs_inode_info.flags is unsigned long variable, it has 32 bits
>> in 32bit architecture, since we introduced FI_MMAP_FILE flag
>> when we support data compression, we may access memory cross
>> the border of .flags field, corrupting .i_sem field, result in
>> below deadlock.
>>
>> To fix this issue, let's expand .flags as an array to grab enough
>> space to store new flags.
> []
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> []
>> @@ -2586,22 +2590,28 @@ static inline void __mark_inode_dirty_flag(struct inode *inode,
>> }
>> }
>>
>> +static inline void __set_inode_flag(struct inode *inode, int flag)
>> +{
>> + test_and_set_bit(flag % BITS_PER_LONG,
>> + &F2FS_I(inode)->flags[BIT_WORD(flag)]);
>
> I believe this should just use
>
> test_and_set_bit(flag, F2FS_I(inode)->flags);
>
>> static inline int is_inode_flag_set(struct inode *inode, int flag)
>> {
>> - return test_bit(flag, &F2FS_I(inode)->flags);
>> + return test_bit(flag % BITS_PER_LONG,
>> + &F2FS_I(inode)->flags[BIT_WORD(flag)]);
>
> here too.
>
> test_bit(flag, F2FS_I(inode)->flags);
>
>> static inline void clear_inode_flag(struct inode *inode, int flag)
>> {
>> - if (test_bit(flag, &F2FS_I(inode)->flags))
>> - clear_bit(flag, &F2FS_I(inode)->flags);
>> + test_and_clear_bit(flag % BITS_PER_LONG,
>> + &F2FS_I(inode)->flags[BIT_WORD(flag)]);
>
> and here.

My bad, Ondřej Jirman also reminded me this issue, will fix this soon.

Thanks,

>
> I also don't know why these functions are used at all.
>
>
> .
>