2023-06-21 08:53:46

by Jingbo Xu

[permalink] [raw]
Subject: [RFC 1/2] erofs: update on-disk format for xattr bloom filter

The xattr bloom filter feature is going to be introduced to speed up the
negative xattr lookup, e.g. system.posix_acl_[access|default] lookup
when running "ls -lR" workload.

The number of common used xattr (n) is approximately 8, including
system.[posix_acl_access|posix_acl_default], security.[capability|selinux]
and security.[SMACK64|SMACK64TRANSMUTE|SMACK64EXEC|SMACK64MMAP]. Given the
number of bits of the bloom filter (m) is 32, the optimal value for the
number of the hash functions (k) is 2 (ln2 * m/n = 2.7).

Signed-off-by: Jingbo Xu <[email protected]>
---
fs/erofs/erofs_fs.h | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index 2c7b16e340fe..9daea86cdb52 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -13,6 +13,7 @@

#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001
#define EROFS_FEATURE_COMPAT_MTIME 0x00000002
+#define EROFS_FEATURE_COMPAT_XATTR_BLOOM 0x00000003

/*
* Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
@@ -200,7 +201,7 @@ struct erofs_inode_extended {
* for read-only fs, no need to introduce h_refcount
*/
struct erofs_xattr_ibody_header {
- __le32 h_reserved;
+ __le32 h_map; /* bloom filter, bit value 1 indicates not-present */
__u8 h_shared_count;
__u8 h_reserved2[7];
__le32 h_shared_xattrs[]; /* shared xattr id array */
@@ -221,6 +222,11 @@ struct erofs_xattr_ibody_header {
#define EROFS_XATTR_LONG_PREFIX 0x80
#define EROFS_XATTR_LONG_PREFIX_MASK 0x7f

+#define EROFS_XATTR_BLOOM_BITS 32
+#define EROFS_XATTR_BLOOM_MASK (EROFS_XATTR_BLOOM_BITS - 1)
+#define EROFS_XATTR_BLOOM_DEFAULT UINT32_MAX
+#define EROFS_XATTR_BLOOM_COUNTS 2
+
/* xattr entry (for both inline & shared xattrs) */
struct erofs_xattr_entry {
__u8 e_name_len; /* length of name */
--
2.19.1.6.gb485710b



2023-06-27 02:38:12

by Gao Xiang

[permalink] [raw]
Subject: Re: [RFC 1/2] erofs: update on-disk format for xattr bloom filter



On 2023/6/21 16:32, Jingbo Xu wrote:
> The xattr bloom filter feature is going to be introduced to speed up the
> negative xattr lookup, e.g. system.posix_acl_[access|default] lookup
> when running "ls -lR" workload.
>
> The number of common used xattr (n) is approximately 8, including
> system.[posix_acl_access|posix_acl_default], security.[capability|selinux]
> and security.[SMACK64|SMACK64TRANSMUTE|SMACK64EXEC|SMACK64MMAP]. Given the
> number of bits of the bloom filter (m) is 32, the optimal value for the
> number of the hash functions (k) is 2 (ln2 * m/n = 2.7).
>
> Signed-off-by: Jingbo Xu <[email protected]>
> ---
> fs/erofs/erofs_fs.h | 8 +++++++-
> 1 file changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
> index 2c7b16e340fe..9daea86cdb52 100644
> --- a/fs/erofs/erofs_fs.h
> +++ b/fs/erofs/erofs_fs.h
> @@ -13,6 +13,7 @@
>
> #define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001
> #define EROFS_FEATURE_COMPAT_MTIME 0x00000002
> +#define EROFS_FEATURE_COMPAT_XATTR_BLOOM 0x00000003

#define EROFS_FEATURE_COMPAT_XATTR_BLOOM 0x00000004

>
> /*
> * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
> @@ -200,7 +201,7 @@ struct erofs_inode_extended {
> * for read-only fs, no need to introduce h_refcount
> */
> struct erofs_xattr_ibody_header {
> - __le32 h_reserved;
> + __le32 h_map; /* bloom filter, bit value 1 indicates not-present */

`map` here is too ambiguous, could we rename it as "h_name_filter"?

> __u8 h_shared_count;
> __u8 h_reserved2[7];
> __le32 h_shared_xattrs[]; /* shared xattr id array */
> @@ -221,6 +222,11 @@ struct erofs_xattr_ibody_header {
> #define EROFS_XATTR_LONG_PREFIX 0x80
> #define EROFS_XATTR_LONG_PREFIX_MASK 0x7f
>
> +#define EROFS_XATTR_BLOOM_BITS 32
> +#define EROFS_XATTR_BLOOM_MASK (EROFS_XATTR_BLOOM_BITS - 1)
> +#define EROFS_XATTR_BLOOM_DEFAULT UINT32_MAX
> +#define EROFS_XATTR_BLOOM_COUNTS 2

could we rename them as EROFS_XATTR_NAME_FILTER_xxx?

Thanks,
Gao Xiang