2024-03-20 08:46:47

by Eugen Hristev

[permalink] [raw]
Subject: [PATCH v14 0/9] Cache insensitive cleanup for ext4/f2fs

Hello,

I am trying to respin the series here :
https://www.spinics.net/lists/linux-ext4/msg85081.html

I resent some of the v9 patches and got some reviews from Gabriel,
I did changes as requested and here is v14.

Changes in v14:
- fix wrong kfree unchecked call
- changed the return code in 3/8

Changes in v13:
- removed stray wrong line in 2/8
- removed old R-b as it's too long since they were given
- removed check for null buff in 2/8
- added new patch `f2fs: Log error when lookup of encoded dentry fails` as suggested
- rebased on unicode.git for-next branch

Changes in v12:
- revert to v10 comparison with propagating the error code from utf comparison

Changes in v11:
- revert to the original v9 implementation for the comparison helper.

Changes in v10:
- reworked a bit the comparison helper to improve performance by
first performing the exact lookup.


* Original commit letter

The case-insensitive implementations in f2fs and ext4 have quite a bit
of duplicated code. This series simplifies the ext4 version, with the
goal of extracting ext4_ci_compare into a helper library that can be
used by both filesystems. It also reduces the clutter from many
codeguards for CONFIG_UNICODE; as requested by Linus, they are part of
the codeflow now.

While there, I noticed we can leverage the utf8 functions to detect
encoded names that are corrupted in the filesystem. Therefore, it also
adds an ext4 error on that scenario, to mark the filesystem as
corrupted.

This series survived passes of xfstests -g quick.

Eugen Hristev (1):
f2fs: Log error when lookup of encoded dentry fails

Gabriel Krisman Bertazi (8):
ext4: Simplify the handling of cached insensitive names
f2fs: Simplify the handling of cached insensitive names
libfs: Introduce case-insensitive string comparison helper
ext4: Reuse generic_ci_match for ci comparisons
f2fs: Reuse generic_ci_match for ci comparisons
ext4: Log error when lookup of encoded dentry fails
ext4: Move CONFIG_UNICODE defguards into the code flow
f2fs: Move CONFIG_UNICODE defguards into the code flow

fs/ext4/crypto.c | 19 ++-----
fs/ext4/ext4.h | 35 +++++++-----
fs/ext4/namei.c | 129 ++++++++++++++++-----------------------------
fs/ext4/super.c | 4 +-
fs/f2fs/dir.c | 114 ++++++++++++++-------------------------
fs/f2fs/f2fs.h | 16 +++++-
fs/f2fs/namei.c | 10 ++--
fs/f2fs/recovery.c | 5 +-
fs/f2fs/super.c | 8 +--
fs/libfs.c | 77 +++++++++++++++++++++++++++
include/linux/fs.h | 4 ++
11 files changed, 217 insertions(+), 204 deletions(-)

--
2.34.1



2024-03-20 08:47:09

by Eugen Hristev

[permalink] [raw]
Subject: [PATCH v14 1/9] ext4: Simplify the handling of cached insensitive names

From: Gabriel Krisman Bertazi <[email protected]>

Keeping it as qstr avoids the unnecessary conversion in ext4_match

Signed-off-by: Gabriel Krisman Bertazi <[email protected]>
[[email protected]: port to 6.8-rc3]
Signed-off-by: Eugen Hristev <[email protected]>
---
fs/ext4/ext4.h | 2 +-
fs/ext4/namei.c | 23 +++++++++++------------
2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a5d784872303..4061d11b9763 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2506,7 +2506,7 @@ struct ext4_filename {
struct fscrypt_str crypto_buf;
#endif
#if IS_ENABLED(CONFIG_UNICODE)
- struct fscrypt_str cf_name;
+ struct qstr cf_name;
#endif
};

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5e4f65c14dfb..b96983a4c185 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1445,7 +1445,8 @@ static int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
struct ext4_filename *name)
{
- struct fscrypt_str *cf_name = &name->cf_name;
+ struct qstr *cf_name = &name->cf_name;
+ unsigned char *buf;
struct dx_hash_info *hinfo = &name->hinfo;
int len;

@@ -1455,18 +1456,18 @@ int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
return 0;
}

- cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
- if (!cf_name->name)
+ buf = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
+ if (!buf)
return -ENOMEM;

- len = utf8_casefold(dir->i_sb->s_encoding,
- iname, cf_name->name,
- EXT4_NAME_LEN);
+ len = utf8_casefold(dir->i_sb->s_encoding, iname, buf, EXT4_NAME_LEN);
if (len <= 0) {
- kfree(cf_name->name);
- cf_name->name = NULL;
+ kfree(buf);
+ buf = NULL;
}
+ cf_name->name = buf;
cf_name->len = (unsigned) len;
+
if (!IS_ENCRYPTED(dir))
return 0;

@@ -1503,8 +1504,6 @@ static bool ext4_match(struct inode *parent,
if (IS_CASEFOLDED(parent) &&
(!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) {
if (fname->cf_name.name) {
- struct qstr cf = {.name = fname->cf_name.name,
- .len = fname->cf_name.len};
if (IS_ENCRYPTED(parent)) {
if (fname->hinfo.hash != EXT4_DIRENT_HASH(de) ||
fname->hinfo.minor_hash !=
@@ -1513,8 +1512,8 @@ static bool ext4_match(struct inode *parent,
return false;
}
}
- return !ext4_ci_compare(parent, &cf, de->name,
- de->name_len, true);
+ return !ext4_ci_compare(parent, &fname->cf_name,
+ de->name, de->name_len, true);
}
return !ext4_ci_compare(parent, fname->usr_fname, de->name,
de->name_len, false);
--
2.34.1


2024-03-20 08:47:38

by Eugen Hristev

[permalink] [raw]
Subject: [PATCH v14 2/9] f2fs: Simplify the handling of cached insensitive names

From: Gabriel Krisman Bertazi <[email protected]>

Keeping it as qstr avoids the unnecessary conversion in f2fs_match

Signed-off-by: Gabriel Krisman Bertazi <[email protected]>
[[email protected]: port to 6.8-rc3 and minor changes]
Signed-off-by: Eugen Hristev <[email protected]>
---
fs/f2fs/dir.c | 53 ++++++++++++++++++++++++++--------------------
fs/f2fs/f2fs.h | 16 +++++++++++++-
fs/f2fs/recovery.c | 5 +----
3 files changed, 46 insertions(+), 28 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 042593aed1ec..3b454f355f15 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -42,35 +42,49 @@ static unsigned int bucket_blocks(unsigned int level)
return 4;
}

+#if IS_ENABLED(CONFIG_UNICODE)
/* If @dir is casefolded, initialize @fname->cf_name from @fname->usr_fname. */
int f2fs_init_casefolded_name(const struct inode *dir,
struct f2fs_filename *fname)
{
-#if IS_ENABLED(CONFIG_UNICODE)
struct super_block *sb = dir->i_sb;
+ unsigned char *buf;
+ int len;

if (IS_CASEFOLDED(dir) &&
!is_dot_dotdot(fname->usr_fname->name, fname->usr_fname->len)) {
- fname->cf_name.name = f2fs_kmem_cache_alloc(f2fs_cf_name_slab,
- GFP_NOFS, false, F2FS_SB(sb));
- if (!fname->cf_name.name)
+ buf = f2fs_kmem_cache_alloc(f2fs_cf_name_slab,
+ GFP_NOFS, false, F2FS_SB(sb));
+ if (!buf)
return -ENOMEM;
- fname->cf_name.len = utf8_casefold(sb->s_encoding,
- fname->usr_fname,
- fname->cf_name.name,
- F2FS_NAME_LEN);
- if ((int)fname->cf_name.len <= 0) {
- kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
- fname->cf_name.name = NULL;
+
+ len = utf8_casefold(sb->s_encoding, fname->usr_fname,
+ buf, F2FS_NAME_LEN);
+ if (len <= 0) {
+ kmem_cache_free(f2fs_cf_name_slab, buf);
if (sb_has_strict_encoding(sb))
return -EINVAL;
/* fall back to treating name as opaque byte sequence */
+ return 0;
}
+ fname->cf_name.name = buf;
+ fname->cf_name.len = len;
}
-#endif
+
return 0;
}

+void f2fs_free_casefolded_name(struct f2fs_filename *fname)
+{
+ unsigned char *buf = (unsigned char *)fname->cf_name.name;
+
+ if (buf) {
+ kmem_cache_free(f2fs_cf_name_slab, buf);
+ fname->cf_name.name = NULL;
+ }
+}
+#endif /* CONFIG_UNICODE */
+
static int __f2fs_setup_filename(const struct inode *dir,
const struct fscrypt_name *crypt_name,
struct f2fs_filename *fname)
@@ -142,12 +156,7 @@ void f2fs_free_filename(struct f2fs_filename *fname)
kfree(fname->crypto_buf.name);
fname->crypto_buf.name = NULL;
#endif
-#if IS_ENABLED(CONFIG_UNICODE)
- if (fname->cf_name.name) {
- kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
- fname->cf_name.name = NULL;
- }
-#endif
+ f2fs_free_casefolded_name(fname);
}

static unsigned long dir_block_index(unsigned int level,
@@ -235,11 +244,9 @@ static inline int f2fs_match_name(const struct inode *dir,
struct fscrypt_name f;

#if IS_ENABLED(CONFIG_UNICODE)
- if (fname->cf_name.name) {
- struct qstr cf = FSTR_TO_QSTR(&fname->cf_name);
-
- return f2fs_match_ci_name(dir, &cf, de_name, de_name_len);
- }
+ if (fname->cf_name.name)
+ return f2fs_match_ci_name(dir, &fname->cf_name,
+ de_name, de_name_len);
#endif
f.usr_fname = fname->usr_fname;
f.disk_name = fname->disk_name;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 65294e3b0bef..4f68c58718fb 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -524,7 +524,7 @@ struct f2fs_filename {
* internal operation where usr_fname is also NULL. In all these cases
* we fall back to treating the name as an opaque byte sequence.
*/
- struct fscrypt_str cf_name;
+ struct qstr cf_name;
#endif
};

@@ -3528,8 +3528,22 @@ int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
/*
* dir.c
*/
+#if IS_ENABLED(CONFIG_UNICODE)
int f2fs_init_casefolded_name(const struct inode *dir,
struct f2fs_filename *fname);
+void f2fs_free_casefolded_name(struct f2fs_filename *fname);
+#else
+static inline int f2fs_init_casefolded_name(const struct inode *dir,
+ struct f2fs_filename *fname)
+{
+ return 0;
+}
+
+static inline void f2fs_free_casefolded_name(struct f2fs_filename *fname)
+{
+}
+#endif /* CONFIG_UNICODE */
+
int f2fs_setup_filename(struct inode *dir, const struct qstr *iname,
int lookup, struct f2fs_filename *fname);
int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index d0f24ccbd1ac..7e53abf6d216 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -153,11 +153,8 @@ static int init_recovered_filename(const struct inode *dir,
if (err)
return err;
f2fs_hash_filename(dir, fname);
-#if IS_ENABLED(CONFIG_UNICODE)
/* Case-sensitive match is fine for recovery */
- kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
- fname->cf_name.name = NULL;
-#endif
+ f2fs_free_casefolded_name(fname);
} else {
f2fs_hash_filename(dir, fname);
}
--
2.34.1


2024-03-20 08:47:58

by Eugen Hristev

[permalink] [raw]
Subject: [PATCH v14 3/9] libfs: Introduce case-insensitive string comparison helper

From: Gabriel Krisman Bertazi <[email protected]>

generic_ci_match can be used by case-insensitive filesystems to compare
strings under lookup with dirents in a case-insensitive way. This
function is currently reimplemented by each filesystem supporting
casefolding, so this reduces code duplication in filesystem-specific
code.

Signed-off-by: Gabriel Krisman Bertazi <[email protected]>
[[email protected]: rework to first test the exact match]
Signed-off-by: Eugen Hristev <[email protected]>
---
fs/libfs.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++
include/linux/fs.h | 4 +++
2 files changed, 81 insertions(+)

diff --git a/fs/libfs.c b/fs/libfs.c
index c297953db948..711eecc125bd 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1776,6 +1776,83 @@ static const struct dentry_operations generic_ci_dentry_ops = {
.d_revalidate = fscrypt_d_revalidate,
#endif
};
+
+/**
+ * generic_ci_match() - Match a name (case-insensitively) with a dirent.
+ * This is a filesystem helper for comparison with directory entries.
+ * generic_ci_d_compare should be used in VFS' ->d_compare instead.
+ *
+ * @parent: Inode of the parent of the dirent under comparison
+ * @name: name under lookup.
+ * @folded_name: Optional pre-folded name under lookup
+ * @de_name: Dirent name.
+ * @de_name_len: dirent name length.
+ *
+ * Test whether a case-insensitive directory entry matches the filename
+ * being searched. If @folded_name is provided, it is used instead of
+ * recalculating the casefold of @name.
+ *
+ * Return: > 0 if the directory entry matches, 0 if it doesn't match, or
+ * < 0 on error.
+ */
+int generic_ci_match(const struct inode *parent,
+ const struct qstr *name,
+ const struct qstr *folded_name,
+ const u8 *de_name, u32 de_name_len)
+{
+ const struct super_block *sb = parent->i_sb;
+ const struct unicode_map *um = sb->s_encoding;
+ struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
+ struct qstr dirent = QSTR_INIT(de_name, de_name_len);
+ int res = 0, match = 0;
+
+ if (IS_ENCRYPTED(parent)) {
+ const struct fscrypt_str encrypted_name =
+ FSTR_INIT((u8 *) de_name, de_name_len);
+
+ if (WARN_ON_ONCE(!fscrypt_has_encryption_key(parent)))
+ return -EINVAL;
+
+ decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+ if (!decrypted_name.name)
+ return -ENOMEM;
+ res = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name,
+ &decrypted_name);
+ if (res < 0)
+ goto out;
+ dirent.name = decrypted_name.name;
+ dirent.len = decrypted_name.len;
+ }
+
+ /*
+ * Attempt a case-sensitive match first. It is cheaper and
+ * should cover most lookups, including all the sane
+ * applications that expect a case-sensitive filesystem.
+ */
+ if (folded_name->name) {
+ if (dirent.len == folded_name->len &&
+ !memcmp(folded_name->name, dirent.name, dirent.len)) {
+ match = 1;
+ goto out;
+ }
+ res = utf8_strncasecmp_folded(um, folded_name, &dirent);
+ } else {
+ if (dirent.len == name->len &&
+ !memcmp(name->name, dirent.name, dirent.len) &&
+ (!sb_has_strict_encoding(sb) || !utf8_validate(um, name))) {
+ match = 1;
+ goto out;
+ }
+ res = utf8_strncasecmp(um, name, &dirent);
+ }
+
+out:
+ kfree(decrypted_name.name);
+ if (res < 0)
+ return res;
+ return (match || !res);
+}
+EXPORT_SYMBOL(generic_ci_match);
#endif

#ifdef CONFIG_FS_ENCRYPTION
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ff1338109b54..690b37e1db95 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3281,6 +3281,10 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
extern int generic_check_addressable(unsigned, u64);

extern void generic_set_sb_d_ops(struct super_block *sb);
+extern int generic_ci_match(const struct inode *parent,
+ const struct qstr *name,
+ const struct qstr *folded_name,
+ const u8 *de_name, u32 de_name_len);

static inline bool sb_has_encoding(const struct super_block *sb)
{
--
2.34.1